NCBI C++ ToolKit
genref.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: genref.cpp 102064 2024-03-25 15:10:22Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * File Name: genref.cpp
27  *
28  * Author: Karl Sirotkin, Hsiu-Chuan Chen
29  *
30  * File Description:
31  * Parse gene qualifiers.
32  *
33  */
34 
35 #include <ncbi_pch.hpp>
36 
37 #include "ftacpp.hpp"
38 
39 #include "index.h"
40 
43 
44 #include "ftaerr.hpp"
45 #include "utilfeat.h"
46 #include "add.h"
47 #include "nucprot.h"
48 
49 #include <objmgr/bioseq_ci.hpp>
50 #include <objmgr/scope.hpp>
59 
60 #include "asci_blk.h"
61 #include "utilfun.h"
62 
63 #ifdef THIS_FILE
64 # undef THIS_FILE
65 #endif
66 #define THIS_FILE "genref.cpp"
67 
70 
71 struct SeqLocInfo {
74 };
75 
76 typedef list<SeqLocInfo> TSeqLocInfoList;
77 typedef std::set<string> TSynSet;
78 typedef std::set<string> TWormbaseSet;
79 typedef std::set<string> TLocusTagSet;
80 
81 struct AccMinMax {
83 
84  // string acc;
85  // Int2 ver = INT2_MIN;
86  Int4 min = -1;
87  Int4 max = -1;
88 };
89 
91 
92 struct GeneLocs {
93  string gene;
94  string locus;
98  list<AccMinMax> ammp;
100 };
101 
103 
104 
106  Int4 from = 0; /* the lowest value in the entry */
107  Int4 to = 0; /* the highest value in the entry */
109  Int4 length = 0; /* total length of the seq-data of the entry */
110  TSeqIdList ids; /* the entry's SeqId */
111  bool noleft = false;
112  bool noright = false;
113 };
114 
116 
117 struct MixLoc {
119  Int4 min = 0;
120  Int4 max = 0;
122  bool noleft = false;
123  bool noright = false;
125  MixLoc* next = nullptr;
126 };
127 
128 using MixLocPtr = MixLoc*;
129 
130 struct GeneList {
131  string locus; /* the name of the gene,
132  copy the value */
133  string locus_tag;
134  string pseudogene;
135  string maploc; /* the map of the gene,
136  copy the value */
137 
139 
140  SeqlocInfoblkPtr slibp = nullptr; /* the location, points to the value */
141  Int4 segnum = 0; /* segment number */
142  Uint1 leave = 0; /* TRUE for aka tRNAs */
143 
145 
146  MixLocPtr mlp = nullptr;
147 
150  TLocusTagSet olt; /* /old_locus_tag values */
151 
152  bool pseudo = false;
153  bool allpseudo = false;
154  bool genefeat = false;
155  bool noleft = false;
156  bool noright = false;
157  string fname;
158  string location;
159  bool todel = false;
160  bool circular = false;
161  GeneList* next = nullptr;
162 };
163 
165 
166 struct CdssList {
171 };
172 
174 
175 struct GeneNode {
176  bool flag; /* TRUE, if a level has been found
177  to put GeneRefPtr */
178  bool seg; /* TRUE, if this is a segment set
179  entries */
182 
183  GeneListPtr glp; /* a list of gene infomation for
184  the entries */
185  TSeqFeatList feats; /* a list which contains the
186  GeneRefPtr only */
187  Int4 segindex; /* total segments in this set */
188  bool accver; /* for ACCESSION.VERSION */
189  bool skipdiv; /* skip BCT and SYN divisions */
191  bool circular;
194  bool got_misc; /* TRUE if there is a misc_feature
195  with gene or/and locus_tag */
196 
198  flag(false),
199  seg(false),
200  bioseq(nullptr),
202  glp(nullptr),
203  segindex(0),
204  accver(false),
205  skipdiv(false),
206  clp(nullptr),
207  circular(false),
208  gelop(nullptr),
210  got_misc(false)
211  {
212  }
213 };
214 
216 
217 /* The list of feature which are not allowed to have gene qual
218  */
219 const char* feat_no_gene[] = { "gap", "operon", "source", nullptr };
220 
221 const char* leave_imp_feat[] = {
222  "LTR",
223  "conflict",
224  "rep_origin",
225  "repeat_region",
226  "satellite",
227  nullptr
228 };
229 
231  3, /* tRNA */
232  4, /* rRNA */
233  5, /* snRNA */
234  6, /* scRNA */
235  -1
236 };
237 
238 /**********************************************************/
239 static void GetLocationStr(const CSeq_loc& loc, string& str)
240 {
241  loc.GetLabel(&str);
243 }
244 
245 /**********************************************************/
246 static bool fta_seqid_same(const CSeq_id& sid, const Char* acnum, const CSeq_id* id)
247 {
248  if (id)
249  return sid.Compare(*id) == CSeq_id::e_YES;
250 
251  if (! acnum)
252  return true;
253 
254  auto id_string = sid.GetSeqIdString();
255  return NStr::EqualCase(id_string, acnum);
256  /*
257  if (! sid.IsGenbank() && ! sid.IsEmbl() && ! sid.IsDdbj() &&
258  ! sid.IsPir() && ! sid.IsSwissprot() && ! sid.IsOther() &&
259  ! sid.IsPrf() && ! sid.IsTpg() && ! sid.IsTpd() &&
260  ! sid.IsTpe() && ! sid.IsGpipe())
261  return false;
262 
263  const CTextseq_id* text_id = sid.GetTextseq_Id();
264  if (! text_id || ! text_id->IsSetAccession() ||
265  text_id->GetAccession() != acnum)
266  return false;
267 
268  return true;
269  */
270 }
271 
272 /**********************************************************/
273 static void fta_seqloc_del_far(CSeq_loc& locs, const Char* acnum, const CSeq_id* id)
274 {
275  vector<CConstRef<CSeq_loc>> to_remove;
276 
277  for (CSeq_loc_CI ci(locs, CSeq_loc_CI::eEmpty_Allow); ci != locs.end(); ++ci) {
278  CConstRef<CSeq_loc> cur_loc = ci.GetRangeAsSeq_loc();
279  if (cur_loc->IsWhole()) {
280  if (fta_seqid_same(*cur_loc->GetId(), acnum, id))
281  continue;
282  } else if (cur_loc->IsInt()) {
283  if (fta_seqid_same(cur_loc->GetInt().GetId(), acnum, id))
284  continue;
285  } else if (cur_loc->IsPnt()) {
286  if (fta_seqid_same(cur_loc->GetPnt().GetId(), acnum, id))
287  continue;
288  } else if (cur_loc->IsBond()) {
289  if (cur_loc->GetBond().IsSetA() &&
290  fta_seqid_same(cur_loc->GetBond().GetA().GetId(), acnum, id))
291  continue;
292  } else if (cur_loc->IsPacked_pnt()) {
293  if (fta_seqid_same(cur_loc->GetPacked_pnt().GetId(), acnum, id))
294  continue;
295  }
296 
297  to_remove.push_back(cur_loc);
298  }
299 
300  for (vector<CConstRef<CSeq_loc>>::const_iterator it = to_remove.begin(); it != to_remove.end(); ++it)
301  locs.Assign(*locs.Subtract(*(*it), 0, nullptr, nullptr));
302 
303  for (CTypeIterator<CSeq_bond> bond(locs); bond; ++bond) {
304  if (bond->IsSetB() && ! fta_seqid_same(bond->GetB().GetId(), acnum, id))
305  bond->ResetB();
306  }
307 }
308 
309 
310 /**********************************************************/
311 static CRef<CSeq_loc> fta_seqloc_local(const CSeq_loc& orig, const Char* acnum)
312 {
313  CRef<CSeq_loc> ret(new CSeq_loc);
314  ret->Assign(orig);
315 
316  if (acnum && *acnum != '\0' && *acnum != ' ')
317  fta_seqloc_del_far(*ret, acnum, nullptr);
318 
319  return ret;
320 }
321 
322 /**********************************************************/
323 static Int4 fta_cmp_gene_syns(const TSynSet& syn1, const TSynSet& syn2)
324 {
325  Int4 i = 0;
326 
327  TSynSet::const_iterator it1 = syn1.begin(),
328  it2 = syn2.begin();
329 
330  for (; it1 != syn1.end() && it2 != syn2.end(); ++it1, ++it2) {
331  i = NStr::CompareNocase(*it1, *it2);
332  if (i != 0)
333  break;
334  }
335 
336  if (it1 == syn1.end() && it2 == syn2.end())
337  return (0);
338  if (it1 == syn1.end())
339  return (-1);
340  if (it2 == syn2.end())
341  return (1);
342  return (i);
343 }
344 
345 
346 /**********************************************************/
347 static Int4 fta_cmp_gene_refs(const CGene_ref& grp1, const CGene_ref& grp2)
348 {
349  Int4 res = 0;
350 
351  TSynSet syn1,
352  syn2;
353 
354  if (grp1.IsSetSyn())
355  syn1.insert(grp1.GetSyn().begin(), grp1.GetSyn().end());
356  if (grp2.IsSetSyn())
357  syn2.insert(grp2.GetSyn().begin(), grp2.GetSyn().end());
358 
359  if (! grp1.IsSetLocus() && ! grp2.IsSetLocus()) {
360  res = fta_cmp_gene_syns(syn1, syn2);
361  if (res != 0)
362  return (res);
363  return (NStr::CompareNocase(grp1.IsSetLocus_tag() ? grp1.GetLocus_tag() : "",
364  grp2.IsSetLocus_tag() ? grp2.GetLocus_tag() : ""));
365  }
366 
367  if (! grp1.IsSetLocus())
368  return (-1);
369  if (! grp2.IsSetLocus())
370  return (1);
371 
372  res = NStr::CompareNocase(grp1.GetLocus(), grp2.GetLocus());
373  if (res != 0)
374  return (res);
375 
376  res = fta_cmp_gene_syns(syn1, syn2);
377  if (res != 0)
378  return (res);
379 
380  return (NStr::CompareNocase(grp1.IsSetLocus_tag() ? grp1.GetLocus_tag() : "",
381  grp2.IsSetLocus_tag() ? grp2.GetLocus_tag() : ""));
382 }
383 
384 /**********************************************************/
386 {
387  Int4 res;
388 
389  if (! glp1 && ! glp2)
390  return (0);
391  if (! glp1)
392  return (-1);
393  if (! glp2)
394  return (1);
395 
396  if (glp1->locus.empty() && glp2->locus.empty()) {
397  res = fta_cmp_gene_syns(glp1->syn, glp2->syn);
398  if (res != 0)
399  return (res);
400  return (NStr::CompareNocase(glp1->locus_tag, glp2->locus_tag));
401  }
402  if (glp1->locus.empty())
403  return (-1);
404  if (glp2->locus.empty())
405  return (1);
406 
407  res = NStr::CompareNocase(glp1->locus, glp2->locus);
408  if (res != 0)
409  return (res);
410  res = fta_cmp_gene_syns(glp1->syn, glp2->syn);
411  if (res != 0)
412  return (res);
413  return (NStr::CompareNocase(glp1->locus_tag, glp2->locus_tag));
414 }
415 
416 /**********************************************************/
417 static bool CompareGeneListName(const GeneListPtr& sp1, const GeneListPtr& sp2)
418 {
419  SeqlocInfoblkPtr slip1 = sp1->slibp;
420  SeqlocInfoblkPtr slip2 = sp2->slibp;
421 
422  Int4 status = sp1->segnum - sp2->segnum;
423  if (status != 0)
424  return status < 0;
425 
426  status = fta_cmp_locusyn(sp1, sp2);
427  if (status != 0)
428  return status < 0;
429 
430  status = slip1->strand - slip2->strand;
431  if (status != 0)
432  return status < 0;
433 
434  status = (Int4)(sp1->leave - sp2->leave);
435  if (status != 0)
436  return status < 0;
437 
438  status = slip1->from - slip2->from;
439  if (status != 0)
440  return status < 0;
441 
442  status = slip2->noleft - slip1->noleft;
443  if (status != 0)
444  return status < 0;
445 
446  status = slip1->to - slip2->to;
447  if (status != 0)
448  return status < 0;
449 
450  return slip1->noright < slip2->noright;
451 }
452 
453 /*static int LIBCALLBACK CompareGeneListName(void PNTR vp1, void PNTR vp2)
454 {
455  GeneListPtr PNTR sp1 = (GeneListPtr PNTR) vp1;
456  GeneListPtr PNTR sp2 = (GeneListPtr PNTR) vp2;
457  SeqlocInfoblkPtr slip1;
458  SeqlocInfoblkPtr slip2;
459  Int4 status;
460 
461  slip1 = (*sp1)->slibp;
462  slip2 = (*sp2)->slibp;
463 
464  status = (*sp1)->segnum - (*sp2)->segnum;
465  if (status != 0)
466  return(status);
467 
468  status = fta_cmp_locusyn(*sp1, *sp2);
469  if (status != 0)
470  return(status);
471 
472  status = slip1->strand - slip2->strand;
473  if (status != 0)
474  return(status);
475 
476  status = (Int4) ((*sp1)->leave - (*sp2)->leave);
477  if (status != 0)
478  return(status);
479 
480  status = slip1->from - slip2->from;
481  if (status != 0)
482  return(status);
483 
484  status = (Int4) (slip2->noleft - slip1->noleft);
485  if (status != 0)
486  return(status);
487 
488  status = slip1->to - slip2->to;
489  if (status != 0)
490  return(status);
491 
492  return((Int4) (slip1->noright - slip2->noright));
493 }*/
494 
495 /**********************************************************/
497 {
498  Int4 index;
499  Int4 total;
500 
501  GeneListPtr glp;
502 
503  total = 0;
504  for (glp = gnp->glp; glp; glp = glp->next)
505  total++;
506 
507  vector<GeneListPtr> temp(total);
508 
509  for (index = 0, glp = gnp->glp; glp; glp = glp->next)
510  temp[index++] = glp;
511 
512  std::sort(temp.begin(), temp.end(), CompareGeneListName);
513 
514  gnp->glp = glp = temp[0];
515  for (index = 0; index < total - 1; glp = glp->next, index++)
516  glp->next = temp[index + 1];
517 
518  glp = temp[total - 1];
519  glp->next = nullptr;
520 
521  return (gnp);
522 }
523 
524 
525 /**********************************************************/
526 static void MixLocFree(MixLocPtr mlp)
527 {
528  MixLocPtr next;
529 
530  for (; mlp; mlp = next) {
531  next = mlp->next;
532  delete mlp;
533  }
534 }
535 
536 /**********************************************************/
537 static void GeneListFree(GeneListPtr glp)
538 {
539  GeneListPtr glpnext;
540 
541  for (; glp; glp = glpnext) {
542  glpnext = glp->next;
543  glp->next = nullptr;
544 
545  // if (glp->locus)
546  // MemFree(glp->locus);
547 
548  // if (glp->locus_tag)
549  // MemFree(glp->locus_tag);
550 
551  // if (glp->pseudogene)
552  // MemFree(glp->pseudogene);
553 
554  // if (glp->maploc)
555  // MemFree(glp->maploc);
556 
557  if (glp->slibp) {
558  delete glp->slibp;
559  }
560  if (glp->mlp)
561  MixLocFree(glp->mlp);
562 
563  // if (glp->fname)
564  // MemFree(glp->fname);
565 
566  // if (glp->location)
567  // MemFree(glp->location);
568 
569  delete glp;
570  }
571 }
572 
573 /**********************************************************/
574 static void CdssListFree(CdssListPtr clp)
575 {
576  CdssListPtr clpnext;
577 
578  for (; clp; clp = clpnext) {
579  clpnext = clp->next;
580  delete clp;
581  }
582 }
583 
584 
585 /**********************************************************/
586 static void GeneLocsFree(GeneLocsPtr gelop)
587 {
588  GeneLocsPtr gelopnext;
589 
590  for (; gelop; gelop = gelopnext) {
591  gelopnext = gelop->next;
592  delete gelop;
593  }
594 }
595 
596 /**********************************************************
597  *
598  * GetLowHighFromSeqLoc:
599  * -- get the lowest "from", highest "to" value, strand
600  * value within one feature key
601  *
602  **********************************************************/
603 static SeqlocInfoblkPtr GetLowHighFromSeqLoc(const CSeq_loc* origslp, Int4 length, const CSeq_id& orig_id)
604 {
605  SeqlocInfoblkPtr slibp = nullptr;
606 
607  Int4 from;
608  Int4 to;
609 
610  ENa_strand strand;
611 
612  bool noleft;
613  bool noright;
614 
615  if (origslp) {
616  for (CSeq_loc_CI loc(*origslp); loc; ++loc) {
617  noleft = false;
618  noright = false;
619 
620  CConstRef<CSeq_loc> cur_loc = loc.GetRangeAsSeq_loc();
621  const CSeq_id* id = nullptr;
622 
623  if (cur_loc->IsInt()) {
624  const CSeq_interval& interval = cur_loc->GetInt();
625  id = &interval.GetId();
626 
627  from = interval.GetFrom();
628  to = interval.GetTo();
629  strand = interval.IsSetStrand() ? interval.GetStrand() : eNa_strand_unknown;
630 
631  if (interval.IsSetFuzz_from() && interval.GetFuzz_from().IsLim() &&
632  interval.GetFuzz_from().GetLim() == CInt_fuzz::eLim_lt)
633  noleft = true;
634  if (interval.IsSetFuzz_to() && interval.GetFuzz_to().IsLim() &&
635  interval.GetFuzz_to().GetLim() == CInt_fuzz::eLim_gt)
636  noright = true;
637  } else if (cur_loc->IsPnt()) {
638  const CSeq_point& point = cur_loc->GetPnt();
639  id = &point.GetId();
640  from = point.GetPoint();
641  to = from;
642  strand = point.IsSetStrand() ? point.GetStrand() : eNa_strand_unknown;
643 
644  if (point.IsSetFuzz() && point.GetFuzz().IsLim()) {
645  if (point.GetFuzz().GetLim() == CInt_fuzz::eLim_gt)
646  noright = true;
647  else if (point.GetFuzz().GetLim() == CInt_fuzz::eLim_lt)
648  noleft = true;
649  }
650  } else
651  continue;
652 
653  /* get low and high only for locations from the same entry */
654  if (from < 0 || to < 0 || ! id || orig_id.Compare(*id) == CSeq_id::e_NO)
655  continue;
656 
657  if (! slibp) {
658  slibp = new SeqlocInfoblk;
659  slibp->from = from;
660  slibp->to = to;
661  slibp->noleft = noleft;
662  slibp->noright = noright;
663  if (length != -99) {
664  slibp->strand = strand;
665 
666  CRef<CSeq_id> sid(new CSeq_id);
667  sid->Assign(*id);
668  slibp->ids.push_back(sid);
669  slibp->length = length; /* total bsp of the entry */
670  }
671  } else {
672  if (slibp->from > from) {
673  slibp->from = from;
674  slibp->noleft = noleft;
675  }
676  if (slibp->to < to) {
677  slibp->to = to;
678  slibp->noright = noright;
679  }
680  }
681  }
682  }
683 
684  return (slibp);
685 }
686 
687 /**********************************************************/
689 {
690  SeqlocInfoblkPtr slp;
692  GeneListPtr glp;
693 
694  first = c->slibp;
695  if (! gnp || first->to >= second->from)
696  return false;
697 
698  for (glp = gnp->glp; glp; glp = glp->next) {
699  if (c->segnum != glp->segnum || ! glp->slibp)
700  continue;
701 
702  slp = glp->slibp;
703  if (gnp->skipdiv == false) {
704  if (glp->leave != 0 || slp->strand != first->strand ||
705  slp->from < first->from || slp->to > second->to ||
706  fta_cmp_locusyn(glp, c) == 0)
707  continue;
708  break;
709  }
710 
711  if (slp->to <= first->to || slp->from >= second->from ||
712  slp->strand != first->strand || fta_cmp_locusyn(glp, c) == 0)
713  continue;
714  break;
715  }
716 
717  return glp != nullptr;
718 }
719 
720 /**********************************************************/
722 {
723  SeqlocInfoblkPtr cloc;
724 
725  cloc = c->slibp;
726  if (cloc->to >= to)
727  return false;
728  for (; clp; clp = clp->next)
729  if (c->segnum == clp->segnum && clp->from > cloc->to && clp->to < to)
730  break;
731 
732  return clp != nullptr;
733 }
734 
735 /**********************************************************/
736 static void AddGeneFeat(GeneListPtr glp, const string& maploc, TSeqFeatList& feats)
737 {
738  CRef<CSeq_feat> feat(new CSeq_feat);
739  CGene_ref& gene_ref = feat->SetData().SetGene();
740 
741  if (! glp->locus.empty()) {
742  gene_ref.SetLocus(glp->locus);
743  }
744  if (! glp->locus_tag.empty()) {
745  gene_ref.SetLocus_tag(glp->locus_tag);
746  }
747  if (! maploc.empty()) {
748  gene_ref.SetMaploc(maploc);
749  }
750 
751  if (! glp->syn.empty()) {
752  gene_ref.SetSyn().assign(glp->syn.begin(), glp->syn.end());
753  glp->syn.clear();
754  }
755 
756  if (glp->loc.NotEmpty())
757  feat->SetLocation(*glp->loc);
758 
759  if (glp->pseudo)
760  feat->SetPseudo(true);
761 
762  if (glp->allpseudo)
763  feat->SetPseudo(true);
764 
765  if (! glp->pseudogene.empty() && glp->pseudogene[0] != '\0') {
766  CRef<CGb_qual> qual(new CGb_qual);
767  qual->SetQual("pseudogene");
768  qual->SetVal(glp->pseudogene);
769 
770  feat->SetQual().push_back(qual);
771  feat->SetPseudo(true);
772  }
773 
774  if (! glp->wormbase.empty()) {
775  if (glp->wormbase.size() > 1)
776  ErrPostEx(SEV_WARNING, ERR_FEATURE_MultipleWBGeneXrefs, "Multiple WormBase WBGene /db_xref qualifiers found for feature with Gene Symbol \"%s\" and Locus Tag \"%s\".", (glp->locus.empty()) ? "NONE" : glp->locus.c_str(), (glp->locus_tag.empty()) ? "NONE" : glp->locus_tag.c_str());
777 
778  for (TWormbaseSet::const_iterator it = glp->wormbase.begin(); it != glp->wormbase.end(); ++it) {
779  if (it->empty())
780  continue;
781 
782  CRef<CDbtag> tag(new CDbtag);
783 
784  tag->SetDb("WormBase");
785  tag->SetTag().SetStr(*it);
786 
787  feat->SetDbxref().push_back(tag);
788  }
789  }
790 
791  if (! glp->olt.empty()) {
792  if (glp->olt.size() > 1)
793  ErrPostEx(SEV_WARNING, ERR_FEATURE_MultipleOldLocusTags, "Multiple /old_locus_tag qualifiers found for feature with Gene Symbol \"%s\" and Locus Tag \"%s\".", (glp->locus.empty()) ? "NONE" : glp->locus.c_str(), (glp->locus_tag.empty()) ? "NONE" : glp->locus_tag.c_str());
794 
795  for (TLocusTagSet::const_iterator it = glp->olt.begin(); it != glp->olt.end(); ++it) {
796  if (it->empty())
797  continue;
798 
799  CRef<CGb_qual> qual(new CGb_qual);
800  qual->SetQual("old_locus_tag");
801  qual->SetVal(*it);
802 
803  feat->SetQual().push_back(qual);
804  }
805  }
806 
807  glp->loc.Reset();
808  feats.push_back(feat);
809 }
810 
811 /**********************************************************/
813 {
814  MixLocPtr res = new MixLoc();
815  res->pId = mlp->pId;
816  res->min = mlp->min;
817  res->max = mlp->max;
818  res->strand = mlp->strand;
819  res->noleft = mlp->noleft;
820  res->noright = mlp->noright;
821  res->numint = mlp->numint;
822  return res;
823 }
824 /**********************************************************/
825 
826 static bool s_IdsMatch(const CRef<CSeq_id>& pId1, const CRef<CSeq_id>& pId2)
827 {
828  if (! pId1 && ! pId2) {
829  return true;
830  }
831 
832  if (! pId1 || ! pId2) {
833  return false;
834  }
835 
836  return (pId1->Compare(*pId2) == CSeq_id::e_YES);
837 }
838 
839 /**********************************************************/
841 {
842  MixLocPtr mlp;
843  MixLocPtr res;
844  MixLocPtr tres;
845  MixLocPtr next;
846  MixLocPtr prev;
847  MixLocPtr ttt;
848  Int2 got;
849 
850  if (! first && ! second)
851  return nullptr;
852 
853  tres = new MixLoc;
854  res = tres;
855  mlp = first ? first : second;
856  for (; mlp; mlp = mlp->next) {
857  res->next = MixLocCopy(mlp);
858  res = res->next;
859  }
860  if (first && second) {
861  for (mlp = second; mlp; mlp = mlp->next) {
862  next = MixLocCopy(mlp);
863  for (res = tres->next; res; res = res->next) {
864  if (! s_IdsMatch(res->pId, next->pId) ||
865  res->strand != next->strand)
866  continue;
867 
868  ttt = res->next;
869  res->next = next;
870  next->next = ttt;
871  break;
872  }
873  if (res)
874  continue;
875 
876  for (prev = tres; prev->next; prev = prev->next) {
877  ttt = prev->next;
878  for (res = mlp->next; res; res = res->next) {
879  if (s_IdsMatch(res->pId, ttt->pId) &&
880  res->strand == ttt->strand)
881  break;
882  }
883  if (res)
884  break;
885  }
886  ttt = prev->next;
887  prev->next = next;
888  next->next = ttt;
889  }
890  }
891 
892  res = tres->next;
893  delete tres;
894  if (! res)
895  return nullptr;
896 
897  for (got = 1; got == 1;) {
898  got = 0;
899  for (tres = res; tres; tres = tres->next) {
900  if (! tres->pId)
901  continue;
902  for (mlp = tres->next; mlp; mlp = mlp->next) {
903  if (! mlp->pId ||
904  ! s_IdsMatch(tres->pId, mlp->pId) ||
905  tres->strand != mlp->strand)
906  continue;
907 
908  if (tres->min == mlp->min && tres->max == mlp->max) {
909  mlp->pId.Reset();
910  if (tres->noleft == false)
911  tres->noleft = mlp->noleft;
912  if (tres->noright == false)
913  tres->noright = mlp->noright;
914  got = 1;
915  continue;
916  }
917 
918  if (join == false ||
919  (tres->min <= mlp->max + 1 && tres->max + 1 >= mlp->min)) {
920  if (tres->min == mlp->min) {
921  if (tres->noleft == false)
922  tres->noleft = mlp->noleft;
923  } else if (tres->min > mlp->min) {
924  tres->min = mlp->min;
925  tres->noleft = mlp->noleft;
926  }
927 
928  if (tres->max == mlp->max) {
929  if (tres->noright == false)
930  tres->noright = mlp->noright;
931  } else if (tres->max < mlp->max) {
932  tres->max = mlp->max;
933  tres->noright = mlp->noright;
934  }
935  mlp->pId.Reset();
936  got = 1;
937  }
938  }
939  }
940  }
941  for (mlp = nullptr, tres = res; tres; tres = next) {
942  next = tres->next;
943  if (tres->pId) {
944  mlp = tres;
945  continue;
946  }
947  if (! mlp)
948  res = tres->next;
949  else
950  mlp->next = tres->next;
951  tres->next = nullptr;
952  MixLocFree(tres);
953  }
954  return (res);
955 }
956 
957 /**********************************************************/
959 {
960  if (! first && ! second)
961  return nullptr;
962 
963  MixLocPtr tres = new MixLoc,
964  res = tres;
965  for (MixLocPtr mlp = first; mlp; mlp = mlp->next) {
966  res->next = MixLocCopy(mlp);
967  res = res->next;
968  }
969  for (MixLocPtr mlp = second; mlp; mlp = mlp->next) {
970  res->next = MixLocCopy(mlp);
971  res = res->next;
972  }
973 
974  res = tres->next;
975  delete tres;
976  return (res);
977 }
978 
979 /**********************************************************/
980 static void fta_add_wormbase(GeneListPtr fromglp, GeneListPtr toglp)
981 {
982  toglp->wormbase.insert(fromglp->wormbase.begin(), fromglp->wormbase.end());
983  fromglp->wormbase.clear();
984 }
985 
986 /**********************************************************/
987 static void fta_add_olt(GeneListPtr fromglp, GeneListPtr toglp)
988 {
989  toglp->olt.insert(fromglp->olt.begin(), fromglp->olt.end());
990  fromglp->olt.clear();
991 }
992 
993 /**********************************************************/
995 {
996  if (! tglp || ! glp)
997  return;
998 
999  if (tglp->pseudogene.empty() && glp->pseudogene.empty())
1000  return;
1001 
1002  if (! tglp->pseudogene.empty() && ! glp->pseudogene.empty()) {
1003  if (tglp->pseudogene[0] == '\0' || glp->pseudogene[0] == '\0') {
1004  tglp->pseudogene[0] = '\0';
1005  glp->pseudogene[0] = '\0';
1006  } else if (tglp->pseudogene != glp->pseudogene) {
1007  ErrPostEx(SEV_ERROR, ERR_FEATURE_InconsistentPseudogene, "All /pseudogene qualifiers for a given Gene and/or Locus-Tag should be uniform. But pseudogenes \"%s\" vs. \"%s\" exist for the features with Gene Symbol \"%s\" and Locus Tag \"%s\".", (glp->locus.empty()) ? "NONE" : glp->locus.c_str(), (glp->locus_tag.empty()) ? "NONE" : glp->locus_tag.c_str(), tglp->pseudogene.c_str(), glp->pseudogene.c_str());
1008  tglp->pseudogene[0] = '\0';
1009  glp->pseudogene[0] = '\0';
1010  }
1011  return;
1012  }
1013 
1014  if (tglp->pseudogene.empty()) {
1015  if (glp->pseudogene[0] != '\0')
1016  tglp->pseudogene = glp->pseudogene;
1017  else {
1018  tglp->pseudogene.resize(1);
1019  tglp->pseudogene[0] = '\0';
1020  }
1021  } else if (glp->pseudogene.empty()) {
1022  if (tglp->pseudogene[0] != '\0')
1023  glp->pseudogene = tglp->pseudogene;
1024  else {
1025  glp->pseudogene.resize(1);
1026  glp->pseudogene[0] = '\0';
1027  }
1028  }
1029 }
1030 
1031 /**********************************************************/
1033 {
1034  GeneListPtr glp;
1035  GeneListPtr tglp;
1036  GeneListPtr next;
1037  GeneListPtr prev;
1038  MixLocPtr mlp;
1039  MixLocPtr tmlp;
1040  Int4 segnum;
1041  Int4 i;
1042  ENa_strand strand;
1043 
1044  for (glp = gnp->glp; glp && glp->segnum == 1; glp = glp->next) {
1045  if (glp->loc || ! glp->mlp)
1046  continue;
1047  segnum = 1;
1048  strand = glp->slibp->strand;
1049  for (tglp = gnp->glp; tglp; tglp = tglp->next) {
1050  if (tglp->loc || ! glp->mlp ||
1051  fta_cmp_locusyn(glp, tglp) != 0)
1052  continue;
1053 
1054  i = tglp->segnum - segnum;
1055  if (i < 0 || i > 1)
1056  break;
1057 
1058  if (tglp->slibp->strand != strand)
1059  continue;
1060 
1061  segnum = tglp->segnum;
1062  }
1063  if (segnum != gnp->segindex)
1064  continue;
1065 
1066  segnum = 0;
1067  mlp = nullptr;
1068  for (tglp = gnp->glp; tglp; tglp = tglp->next) {
1069  if (tglp->loc || tglp->segnum - segnum != 1 ||
1070  ! tglp->mlp || fta_cmp_locusyn(glp, tglp) != 0)
1071  continue;
1072 
1073  if (tglp->slibp->strand != strand)
1074  continue;
1075  segnum++;
1076  tmlp = EasySeqLocMerge(mlp, tglp->mlp, false);
1077  MixLocFree(tglp->mlp);
1078  MixLocFree(mlp);
1079  mlp = tmlp;
1080 
1081  if (segnum != gnp->segindex) {
1082  tglp->mlp = nullptr;
1083  if (tglp->pseudo)
1084  glp->pseudo = true;
1085  if (tglp->allpseudo == false)
1086  glp->allpseudo = false;
1087  fta_check_pseudogene(tglp, glp);
1088  fta_add_wormbase(tglp, glp);
1089  fta_add_olt(tglp, glp);
1090  continue;
1091  }
1092  tglp->mlp = mlp;
1093  break;
1094  }
1095  }
1096  prev = nullptr;
1097  for (tglp = gnp->glp; tglp; tglp = next) {
1098  next = tglp->next;
1099  if (tglp->mlp) {
1100  prev = tglp;
1101  continue;
1102  }
1103  if (! prev)
1104  gnp->glp = tglp->next;
1105  else
1106  prev->next = tglp->next;
1107  tglp->next = nullptr;
1108  GeneListFree(tglp);
1109  }
1110 }
1111 
1112 /**********************************************************/
1113 static bool fta_check_feat_overlap(GeneLocsPtr gelop, GeneListPtr c, MixLocPtr mlp, Int4 from, Int4 to)
1114 {
1115  Int4 min;
1116  Int4 max;
1117 
1118  if (! gelop || ! c || ! mlp)
1119  return true;
1120 
1121  min = (mlp->min > from) ? from : mlp->min;
1122  max = (mlp->max < to) ? to : mlp->max;
1123 
1124  for (; gelop; gelop = gelop->next) {
1125  if (min > gelop->verymax) {
1126  gelop = nullptr;
1127  break;
1128  }
1129 
1130  if ((gelop->strand > -1 && c->slibp->strand != gelop->strand) ||
1131  max < gelop->verymin)
1132  continue;
1133 
1134  if (fta_strings_same(c->locus.c_str(), gelop->gene.c_str()) && fta_strings_same(c->locus_tag.c_str(), gelop->locus.c_str()))
1135  continue;
1136  auto it = gelop->ammp.begin();
1137  for (; it != gelop->ammp.end(); ++it) {
1138  auto ammp = *it;
1139  int ver1 = 0;
1140  string label1;
1141  ammp.pId->GetLabel(&label1, &ver1);
1142  string label2;
1143  int ver2 = 0;
1144  mlp->pId->GetLabel(&label2, &ver2);
1145  if (max < ammp.min || min > ammp.max || ver1 != ver2)
1146  continue;
1147  if (label1 == label2)
1148  break;
1149  }
1150  if (it != gelop->ammp.end()) {
1151  break;
1152  }
1153  }
1154 
1155  return gelop != nullptr;
1156 }
1157 
1158 /**********************************************************/
1159 static bool ConfirmCircular(MixLocPtr mlp)
1160 {
1161  MixLocPtr tmlp;
1162 
1163  if (! mlp || ! mlp->next)
1164  return false;
1165 
1166  tmlp = mlp;
1167  if (mlp->strand != eNa_strand_minus) {
1168  for (; tmlp->next; tmlp = tmlp->next)
1169  if (tmlp->min > tmlp->next->min)
1170  break;
1171  } else {
1172  for (; tmlp->next; tmlp = tmlp->next)
1173  if (tmlp->min < tmlp->next->min)
1174  break;
1175  }
1176 
1177  if (tmlp->next)
1178  return true;
1179 
1180  for (tmlp = mlp; tmlp; tmlp = tmlp->next) {
1181  tmlp->numint = 0;
1182  }
1183  return false;
1184 }
1185 
1186 /**********************************************************/
1187 static void FixMixLoc(GeneListPtr c, GeneLocsPtr gelop)
1188 {
1189  Int4 from;
1190  Int4 to;
1191  MixLocPtr mlp;
1192  MixLocPtr tmlp;
1193  ENa_strand strand;
1194 
1195  bool noleft;
1196  bool noright;
1197  bool tempcirc;
1198 
1199  c->mlp = nullptr;
1200 
1201  if (c->feat.Empty() || ! c->feat->IsSetLocation()) {
1202  CRef<CSeq_id> pTempId;
1203  for (auto pId : c->slibp->ids) {
1204  if (pId) {
1205  pTempId = pId;
1206  break;
1207  }
1208  }
1209 
1210  if (! pTempId) {
1211  return;
1212  }
1213 
1214  mlp = new MixLoc();
1215  mlp->pId = pTempId;
1216  mlp->min = c->slibp->from;
1217  mlp->max = c->slibp->to;
1218  mlp->strand = c->slibp->strand;
1219  mlp->noleft = c->slibp->noleft;
1220  mlp->noright = c->slibp->noright;
1221  mlp->numint = 0;
1222  c->mlp = mlp;
1223  return;
1224  }
1225 
1226  if (c->leave == 1) {
1227  c->loc.Reset(&c->feat->SetLocation());
1228  return;
1229  }
1230 
1231  mlp = nullptr;
1232  Int4 i = 1;
1233  const CSeq_loc& locs = c->feat->GetLocation();
1234  for (CSeq_loc::const_iterator loc = locs.begin(); loc != locs.end(); ++loc) {
1235  noleft = false;
1236  noright = false;
1237  CRef<CSeq_id> pId;
1238 
1239  CConstRef<CSeq_loc> cur_loc = loc.GetRangeAsSeq_loc();
1240  if (cur_loc->IsInt()) {
1241  const CSeq_interval& interval = cur_loc->GetInt();
1242  if (interval.IsSetId()) {
1243  pId = Ref(new CSeq_id());
1244  pId->Assign(interval.GetId());
1245  }
1246 
1247  from = interval.IsSetFrom() ? interval.GetFrom() : 0;
1248  to = interval.IsSetTo() ? interval.GetTo() : 0;
1249  strand = cur_loc->IsSetStrand() ? cur_loc->GetStrand() : eNa_strand_unknown;
1250 
1251  if (interval.IsSetFuzz_from() && interval.GetFuzz_from().IsLim() && interval.GetFuzz_from().GetLim() == CInt_fuzz::eLim_lt)
1252  noleft = true;
1253 
1254  if (interval.IsSetFuzz_to() && interval.GetFuzz_to().IsLim() && interval.GetFuzz_to().GetLim() == CInt_fuzz::eLim_gt)
1255  noright = true;
1256  } else if (cur_loc->IsPnt()) {
1257  const CSeq_point& point = cur_loc->GetPnt();
1258  if (point.IsSetId()) {
1259  pId = Ref(new CSeq_id());
1260  pId->Assign(point.GetId());
1261  }
1262 
1263  from = point.IsSetPoint() ? point.GetPoint() : 0;
1264  to = from;
1265  strand = cur_loc->IsSetStrand() ? cur_loc->GetStrand() : eNa_strand_unknown;
1266 
1267  if (point.IsSetFuzz() && point.GetFuzz().IsLim()) {
1268  if (point.GetFuzz().GetLim() == CInt_fuzz::eLim_gt)
1269  noright = true;
1270  else if (point.GetFuzz().GetLim() == CInt_fuzz::eLim_lt)
1271  noleft = true;
1272  }
1273  } else
1274  continue;
1275 
1276  if (! pId || from < 0 || to < 0) {
1277  continue;
1278  }
1279 
1280 
1281  if (! mlp) {
1282  mlp = new MixLoc();
1283  mlp->pId = pId;
1284  mlp->min = from;
1285  mlp->max = to;
1286  mlp->strand = strand;
1287  mlp->noleft = noleft;
1288  mlp->noright = noright;
1289  mlp->numint = i++;
1290  continue;
1291  }
1292 
1293  for (tmlp = mlp;; tmlp = tmlp->next) {
1294  tempcirc = false;
1295  if (s_IdsMatch(pId, tmlp->pId) && tmlp->strand == strand) {
1296  if (tempcirc == false && ((tmlp->min <= to && tmlp->max >= from) ||
1297  fta_check_feat_overlap(gelop, c, tmlp, from, to) == false)) {
1298  if (tmlp->min > from) {
1299  tmlp->min = from;
1300  tmlp->noleft = noleft;
1301  }
1302  if (tmlp->max < to) {
1303  tmlp->max = to;
1304  tmlp->noright = noright;
1305  }
1306  break;
1307  }
1308  }
1309 
1310  if (tmlp->next)
1311  continue;
1312 
1313  tmlp->next = new MixLoc();
1314  tmlp = tmlp->next;
1315  tmlp->pId = pId;
1316  tmlp->min = from;
1317  tmlp->max = to;
1318  tmlp->strand = strand;
1319  tmlp->noleft = noleft;
1320  tmlp->noright = noright;
1321  tmlp->numint = i++;
1322  break;
1323  }
1324  }
1325 
1326  c->mlp = mlp;
1327 }
1328 
1329 
1330 /**********************************************************/
1331 static void fta_make_seq_int(MixLocPtr mlp, bool noleft, bool noright, CSeq_interval& interval)
1332 {
1333  if (mlp->strand != eNa_strand_unknown)
1334  interval.SetStrand(mlp->strand);
1335 
1336  interval.SetFrom(mlp->min);
1337  interval.SetTo(mlp->max);
1338 
1339  interval.SetId(*(mlp->pId));
1340 
1341  if (mlp->noleft || noleft) {
1342  interval.SetFuzz_from().SetLim(CInt_fuzz::eLim_lt);
1343  }
1344 
1345  if (mlp->noright || noright) {
1346  interval.SetFuzz_to().SetLim(CInt_fuzz::eLim_gt);
1347  }
1348 }
1349 
1350 /**********************************************************/
1351 static void fta_make_seq_pnt(MixLocPtr mlp, bool noleft, bool noright, CSeq_point& point)
1352 {
1353  if (mlp->strand != eNa_strand_unknown)
1354  point.SetStrand(mlp->strand);
1355  point.SetPoint(mlp->min);
1356 
1357  point.SetId(*(mlp->pId));
1358 
1359  if (mlp->noleft || mlp->noright || noleft || noright) {
1360  CInt_fuzz::TLim lim = (mlp->noleft == false && noleft == false) ? CInt_fuzz::eLim_gt : CInt_fuzz::eLim_lt;
1361  point.SetFuzz().SetLim(lim);
1362  }
1363 }
1364 
1365 /**********************************************************/
1366 static CRef<CSeq_loc> MakeCLoc(MixLocPtr mlp, bool noleft, bool noright)
1367 {
1368  CRef<CSeq_loc> ret(new CSeq_loc);
1369 
1370  if (! mlp->next) {
1371  if (mlp->min == mlp->max)
1372  fta_make_seq_pnt(mlp, noleft, noright, ret->SetPnt());
1373  else
1374  fta_make_seq_int(mlp, noleft, noright, ret->SetInt());
1375  return ret;
1376  }
1377 
1378  CRef<CSeq_loc> cur;
1379  CSeq_loc_mix& mix = ret->SetMix();
1380 
1381  for (; mlp; mlp = mlp->next) {
1382  cur.Reset(new CSeq_loc);
1383 
1384  if (mlp->min == mlp->max) {
1385  fta_make_seq_pnt(mlp, noleft, noright, cur->SetPnt());
1386  } else {
1387  fta_make_seq_int(mlp, false, false, cur->SetInt());
1388  }
1389 
1390  mix.AddSeqLoc(*cur);
1391  }
1392 
1393  return ret;
1394 }
1395 
1396 /**********************************************************/
1398 {
1399  MixLocPtr mlp;
1400  MixLocPtr tmlp;
1401  Int2 count;
1402 
1403  count = 0;
1404  for (mlp = second; mlp; mlp = mlp->next) {
1405  if (! mlp->pId)
1406  continue;
1407  for (tmlp = second; tmlp < mlp; tmlp = tmlp->next)
1408  if (tmlp->pId && s_IdsMatch(tmlp->pId, mlp->pId))
1409  break;
1410  if (tmlp < mlp)
1411  continue;
1412  count++;
1413  }
1414  for (mlp = first; mlp; mlp = mlp->next) {
1415  if (! mlp->pId)
1416  continue;
1417  for (tmlp = first; tmlp < mlp; tmlp = tmlp->next)
1418  if (tmlp->pId && s_IdsMatch(tmlp->pId, mlp->pId))
1419  break;
1420  if (tmlp < mlp)
1421  continue;
1422  count--;
1423  }
1424  return (count);
1425 }
1426 
1427 /**********************************************************/
1429 {
1430  MixLocPtr mlp;
1431  MixLocPtr tmlp;
1432  MixLocPtr mlpprev;
1433  MixLocPtr mlpnext;
1434 
1435  if (! c->mlp || ! c->mlp->next)
1436  return;
1437 
1438  for (bool got = true; got == true;) {
1439  got = false;
1440  for (mlp = c->mlp; mlp; mlp = mlp->next) {
1441  if (mlp->numint == -1)
1442  continue;
1443  for (tmlp = mlp->next; tmlp; tmlp = tmlp->next) {
1444  if (tmlp->numint == -1 || mlp->strand != tmlp->strand)
1445  continue;
1446 
1447  if (mlp->numint == 0 && tmlp->numint == 0) {
1448  if ((tmlp->min >= mlp->min && tmlp->min <= mlp->max) ||
1449  (tmlp->max >= mlp->min && tmlp->max <= mlp->max) ||
1450  (mlp->min > tmlp->min && mlp->max < tmlp->max)) {
1451  if (mlp->min == tmlp->min) {
1452  if (tmlp->noleft)
1453  mlp->noleft = true;
1454  } else if (mlp->min > tmlp->min) {
1455  mlp->min = tmlp->min;
1456  mlp->noleft = tmlp->noleft;
1457  }
1458  if (mlp->max == tmlp->max) {
1459  if (tmlp->noright)
1460  mlp->noright = true;
1461  } else if (mlp->max < tmlp->max) {
1462  mlp->max = tmlp->max;
1463  mlp->noright = tmlp->noright;
1464  }
1465  tmlp->numint = -1;
1466  got = true;
1467  }
1468  continue;
1469  }
1470 
1471  if (mlp->min != tmlp->min || mlp->max != tmlp->max)
1472  continue;
1473  if (tmlp->numint == 0) {
1474  if (tmlp->noleft)
1475  mlp->noleft = true;
1476  if (tmlp->noright)
1477  mlp->noright = true;
1478  tmlp->numint = -1;
1479  } else if (mlp->numint == 0) {
1480  if (mlp->noleft)
1481  tmlp->noleft = true;
1482  if (mlp->noright)
1483  tmlp->noright = true;
1484  mlp->numint = -1;
1485  } else if (tmlp->numint >= mlp->numint) {
1486  if (tmlp->noleft)
1487  mlp->noleft = true;
1488  if (tmlp->noright)
1489  mlp->noright = true;
1490  tmlp->numint = -1;
1491  } else {
1492  if (mlp->noleft)
1493  tmlp->noleft = true;
1494  if (mlp->noright)
1495  tmlp->noright = true;
1496  mlp->numint = -1;
1497  }
1498  got = true;
1499  }
1500  }
1501  }
1502 
1503  for (mlpprev = nullptr, mlp = c->mlp; mlp; mlp = mlpnext) {
1504  mlpnext = mlp->next;
1505  if (mlp->numint != -1) {
1506  mlpprev = mlp;
1507  continue;
1508  }
1509 
1510  if (! mlpprev)
1511  c->mlp = mlpnext;
1512  else
1513  mlpprev->next = mlpnext;
1514  mlp->next = nullptr;
1515  MixLocFree(mlp);
1516  }
1517 
1518  mlpprev = nullptr;
1519  for (mlp = c->mlp; mlp; mlpprev = mlp, mlp = mlp->next)
1520  if (mlp->numint == 1)
1521  break;
1522 
1523  if (mlp && mlp != c->mlp) {
1524  mlpprev->next = nullptr;
1525  for (tmlp = mlp; tmlp->next;)
1526  tmlp = tmlp->next;
1527  tmlp->next = c->mlp;
1528  c->mlp = mlp;
1529  }
1530 }
1531 
1532 /**********************************************************/
1533 static void SortMixLoc(GeneListPtr c)
1534 {
1535  MixLocPtr mlp;
1536  MixLocPtr tmlp;
1537 
1538  bool noleft;
1539  bool noright;
1540 
1541  Int4 min;
1542  Int4 max;
1543  Int4 numint;
1544 
1545  if (! c->mlp || ! c->mlp->next)
1546  return;
1547 
1548  for (mlp = c->mlp; mlp; mlp = mlp->next) {
1549  for (tmlp = mlp->next; tmlp; tmlp = tmlp->next) {
1550  if (! s_IdsMatch(mlp->pId, tmlp->pId) ||
1551  mlp->strand != tmlp->strand)
1552  break;
1553  if (mlp->strand == eNa_strand_minus) {
1554  if (tmlp->min < mlp->min)
1555  continue;
1556  if (tmlp->min == mlp->min) {
1557  if (tmlp->noleft == mlp->noleft) {
1558  if (tmlp->max < mlp->max)
1559  continue;
1560  if (tmlp->max == mlp->max) {
1561  if (tmlp->noright == mlp->noright || mlp->noright)
1562  continue;
1563  }
1564  } else if (mlp->noleft)
1565  continue;
1566  }
1567  } else {
1568  if (tmlp->min > mlp->min)
1569  continue;
1570  if (tmlp->min == mlp->min) {
1571  if (tmlp->noleft == mlp->noleft) {
1572  if (tmlp->max > mlp->max)
1573  continue;
1574  if (tmlp->max == mlp->max) {
1575  if (tmlp->noright == mlp->noright || tmlp->noright)
1576  continue;
1577  }
1578  } else if (tmlp->noleft)
1579  continue;
1580  }
1581  }
1582  min = mlp->min;
1583  max = mlp->max;
1584  noleft = mlp->noleft;
1585  noright = mlp->noright;
1586  numint = mlp->numint;
1587  mlp->min = tmlp->min;
1588  mlp->max = tmlp->max;
1589  mlp->noleft = tmlp->noleft;
1590  mlp->noright = tmlp->noright;
1591  mlp->numint = tmlp->numint;
1592  tmlp->min = min;
1593  tmlp->max = max;
1594  tmlp->noleft = noleft;
1595  tmlp->noright = noright;
1596  tmlp->numint = numint;
1597  }
1598  }
1599 }
1600 
1601 /**********************************************************/
1602 static void ScannGeneName(GeneNodePtr gnp, Int4 seqlen)
1603 {
1604  GeneListPtr c;
1605  GeneListPtr cn;
1606  GeneListPtr cp;
1607 
1608  MixLocPtr mlp;
1609  Int4 j;
1610  Int2 level;
1611 
1612  bool join;
1613 
1614  for (c = gnp->glp; c->next; c = c->next) {
1615  for (cn = c->next; cn; cn = cn->next) {
1616  if (c->segnum == cn->segnum &&
1617  c->feat.NotEmpty() && cn->feat.NotEmpty() &&
1618  c->feat->IsSetData() && c->feat->GetData().IsCdregion() &&
1619  cn->feat->IsSetData() && c->feat->GetData().IsCdregion() &&
1620  fta_cmp_locusyn(c, cn) == 0) {
1621  ErrPostEx(SEV_WARNING, ERR_GENEREF_NoUniqMaploc, "Two different cdregions for one gene %s\"%s\".", (c->locus.empty()) ? "with locus_tag " : "", (c->locus.empty()) ? c->locus_tag.c_str() : c->locus.c_str());
1622  }
1623  }
1624  }
1625 
1626  bool circular = false;
1627  for (j = 1, c = gnp->glp; c; c = c->next, j++) {
1628 
1629  FixMixLoc(c, gnp->gelop);
1630  if (gnp->circular && ConfirmCircular(c->mlp))
1631  circular = true;
1632  }
1633 
1634  gnp->circular = circular;
1635 
1636 
1637  for (c = gnp->glp; c->next;) {
1638  cn = c->next;
1639  if (c->segnum != cn->segnum || fta_cmp_locusyn(c, cn) != 0 ||
1640  c->leave != 0 || cn->leave != 0 ||
1641  c->slibp->strand != cn->slibp->strand ||
1642  (gnp->simple_genes == false && DoWeHaveGeneInBetween(c, cn->slibp, gnp))) {
1643  c = cn;
1644  continue;
1645  }
1646 
1647  if (gnp->skipdiv && gnp->clp && gnp->simple_genes == false &&
1648  DoWeHaveCdssInBetween(c, cn->slibp->from, gnp->clp)) {
1649  c = cn;
1650  continue;
1651  }
1652 
1653  if (c->slibp->from > cn->slibp->from) {
1654  c->slibp->from = cn->slibp->from;
1655  c->slibp->noleft = cn->slibp->noleft;
1656  }
1657  if (c->slibp->to < cn->slibp->to) {
1658  c->slibp->to = cn->slibp->to;
1659  c->slibp->noright = cn->slibp->noright;
1660  }
1661 
1662  if (! gnp->simple_genes) {
1663  for (cp = gnp->glp; cp; cp = cp->next) {
1664  if (cp->segnum != c->segnum || cp->leave == 1 || cp->circular)
1665  continue;
1666  if (fta_cmp_locusyn(cp, c) == 0 ||
1667  cp->slibp->strand != c->slibp->strand)
1668  continue;
1669  if (c->slibp->from <= cp->slibp->to &&
1670  c->slibp->to >= cp->slibp->from)
1671  break;
1672  }
1673  join = (cp != nullptr);
1674  } else
1675  join = false;
1676 
1677  if (! gnp->circular) {
1678  level = GetMergeOrder(c->mlp, cn->mlp);
1679  if (level > 0) {
1680  mlp = EasySeqLocMerge(cn->mlp, c->mlp, join);
1681  c->feat = cn->feat;
1682  } else
1683  mlp = EasySeqLocMerge(c->mlp, cn->mlp, join);
1684  } else
1685  mlp = CircularSeqLocCollect(c->mlp, cn->mlp);
1686 
1687  if (c->mlp)
1688  MixLocFree(c->mlp);
1689  c->mlp = mlp;
1690  if (cn->pseudo)
1691  c->pseudo = true;
1692  if (! cn->allpseudo)
1693  c->allpseudo = false;
1694  fta_check_pseudogene(cn, c);
1695  fta_add_wormbase(cn, c);
1696  fta_add_olt(cn, c);
1697  c->noleft = c->slibp->noleft;
1698  c->noright = c->slibp->noright;
1699  c->next = cn->next;
1700  cn->next = nullptr;
1701  GeneListFree(cn);
1702  }
1703 
1704  for (c = gnp->glp; c; c = c->next) {
1705  SortMixLoc(c);
1706  if (gnp->circular)
1708  }
1709 
1710  if (gnp->seg)
1711  MessWithSegGenes(gnp);
1712 
1713  for (c = gnp->glp; c; c = c->next)
1714  if (c->loc.Empty() && c->mlp)
1715  c->loc = MakeCLoc(c->mlp, c->noleft, c->noright);
1716 
1717  for (c = gnp->glp; c; c = c->next) {
1718  if (c->loc.Empty())
1719  continue;
1720 
1721  const CSeq_loc& loc = *c->loc;
1722  for (cn = gnp->glp; cn; cn = cn->next) {
1723  if (cn->loc.Empty() || &loc == cn->loc || cn->segnum != c->segnum ||
1724  cn->slibp->strand != c->slibp->strand ||
1725  fta_cmp_locusyn(cn, c) != 0)
1726  continue;
1727 
1729  if (cmp_res != sequence::eContains && cmp_res != sequence::eSame)
1730  continue;
1731 
1732  if (cn->leave == 1 || c->leave == 0)
1733  c->leave = cn->leave;
1734  if (cn->pseudo)
1735  c->pseudo = true;
1736  if (! cn->allpseudo)
1737  c->allpseudo = false;
1738  fta_check_pseudogene(cn, c);
1739  fta_add_wormbase(cn, c);
1740  fta_add_olt(cn, c);
1741  if (cn->noleft)
1742  c->noleft = true;
1743  if (cn->noright)
1744  c->noright = true;
1745 
1746  cn->loc.Reset();
1747  }
1748  }
1749 
1750  for (cp = nullptr, c = gnp->glp; c; c = cn) {
1751  cn = c->next;
1752  if (! c->loc) {
1753  if (! cp)
1754  gnp->glp = cn;
1755  else
1756  cp->next = cn;
1757  c->next = nullptr;
1758  GeneListFree(c);
1759  } else
1760  cp = c;
1761  }
1762 
1763  for (c = gnp->glp; c; c = cn) {
1764  cn = c->next;
1765  if (! cn || fta_cmp_locusyn(c, cn) != 0) {
1766  AddGeneFeat(c, c->maploc, gnp->feats);
1767  continue;
1768  }
1769 
1770  string maploc;
1771  for (cn = c; cn; cn = cn->next) {
1772  if (fta_cmp_locusyn(c, cn) != 0)
1773  break;
1774  if (cn->maploc.empty())
1775  continue;
1776 
1777  if (maploc.empty()) {
1778  maploc = cn->maploc;
1779  } else if (! NStr::EqualNocase(maploc, cn->maploc)) {
1780  ErrPostEx(SEV_WARNING, ERR_GENEREF_NoUniqMaploc, "Different maplocs in the gene %s\"%s\".", (c->locus.empty()) ? "with locus_tag " : "", (c->locus.empty()) ? c->locus_tag.c_str() : c->locus.c_str());
1781  }
1782  }
1783  for (cn = c; cn; cn = cn->next) {
1784  if (fta_cmp_locusyn(c, cn) != 0)
1785  break;
1786  AddGeneFeat(cn, maploc, gnp->feats);
1787  }
1788  }
1789 }
1790 
1791 /**********************************************************/
1792 static CRef<CSeq_id> CpSeqIdAcOnly(const CSeq_id& id, bool accver)
1793 {
1794  auto new_id = Ref(new CSeq_id());
1795  new_id->Assign(id);
1796 
1797  if (! accver) {
1798  const CTextseq_id* pTextId = new_id->GetTextseq_Id();
1799  if (pTextId && pTextId->IsSetVersion()) {
1800  const_cast<CTextseq_id*>(pTextId)->ResetVersion();
1801  }
1802  }
1803 
1804  return new_id;
1805 }
1806 
1807 /**********************************************************/
1808 static bool WeDontNeedToJoinThis(const CSeqFeatData& data)
1809 {
1810  const Char** b;
1811  short* i;
1812 
1813  if (data.IsRna()) {
1814  const CRNA_ref& rna_ref = data.GetRna();
1815 
1816  i = nullptr;
1817  if (rna_ref.IsSetType()) {
1818  for (i = leave_rna_feat; *i != -1; i++) {
1819  if (rna_ref.GetType() == *i)
1820  break;
1821  }
1822  }
1823 
1824  if (i && *i != -1)
1825  return true;
1826 
1827  if (rna_ref.GetType() == CRNA_ref::eType_other && rna_ref.IsSetExt() && rna_ref.GetExt().IsName() &&
1828  rna_ref.GetExt().GetName() == "ncRNA")
1829  return true;
1830  } else if (data.IsImp()) {
1831  b = nullptr;
1832  if (data.GetImp().IsSetKey()) {
1833  for (b = leave_imp_feat; *b; b++) {
1834  if (data.GetImp().GetKey() == *b)
1835  break;
1836  }
1837  }
1838  if (b && *b)
1839  return true;
1840  }
1841  return false;
1842 }
1843 
1844 /**********************************************************/
1845 static void GetGeneSyns(const TQualVector& quals, const char* name, TSynSet& syns)
1846 {
1847  if (! name)
1848  return;
1849 
1850  for (const auto& qual : quals) {
1851  if (! qual->IsSetQual() || ! qual->IsSetVal() ||
1852  qual->GetQual() != "gene" ||
1853  NStr::EqualNocase(qual->GetVal(), name))
1854  continue;
1855 
1856  syns.insert(qual->GetVal());
1857  }
1858 
1859  for (const auto& qual : quals) {
1860  if (! qual->IsSetQual() || ! qual->IsSetVal() ||
1861  qual->GetQual() != "gene_synonym" ||
1862  NStr::EqualNocase(qual->GetVal(), name))
1863  continue;
1864 
1865  syns.insert(qual->GetVal());
1866  }
1867 }
1868 
1869 /**********************************************************/
1870 static bool fta_rnas_cds_feat(const CSeq_feat& feat)
1871 {
1872  if (! feat.IsSetData())
1873  return false;
1874 
1875  if (feat.GetData().IsImp()) {
1876  if (feat.GetData().GetImp().IsSetKey()) {
1877  const string& key = feat.GetData().GetImp().GetKey();
1878  if (key == "CDS" || key == "rRNA" ||
1879  key == "tRNA" || key == "mRNA")
1880  return true;
1881  }
1882  return false;
1883  }
1884 
1885  if (feat.GetData().IsCdregion())
1886  return true;
1887 
1888  if (! feat.GetData().IsRna())
1889  return false;
1890 
1891  const CRNA_ref& rna_ref = feat.GetData().GetRna();
1892  if (rna_ref.IsSetType() && rna_ref.GetType() > 1 && rna_ref.GetType() < 5) /* mRNA, tRNA or rRNA */
1893  return true;
1894 
1895  return false;
1896 }
1897 
1898 /**********************************************************/
1899 static bool IfCDSGeneFeat(const CSeq_feat& feat, Uint1 choice, const char* key)
1900 {
1901  if (feat.IsSetData() && feat.GetData().Which() == choice)
1902  return true;
1903 
1904  if (feat.GetData().IsImp()) {
1905  if (feat.GetData().GetImp().IsSetKey() && feat.GetData().GetImp().GetKey() == key)
1906  return true;
1907  }
1908 
1909  return false;
1910 }
1911 
1912 /**********************************************************/
1913 static bool GetFeatNameAndLoc(GeneListPtr glp, const CSeq_feat& feat, GeneNodePtr gnp)
1914 {
1915  const char* p;
1916 
1917  bool ret = false;
1918 
1919  p = nullptr;
1920  if (feat.IsSetData()) {
1921  if (feat.GetData().IsImp()) {
1922  p = feat.GetData().GetImp().IsSetKey() ? feat.GetData().GetImp().GetKey().c_str() : nullptr;
1923  } else if (feat.GetData().IsCdregion())
1924  p = "CDS";
1925  else if (feat.GetData().IsGene())
1926  p = "gene";
1927  else if (feat.GetData().IsBiosrc())
1928  p = "source";
1929  else if (feat.GetData().IsRna()) {
1930  const CRNA_ref& rna_ref = feat.GetData().GetRna();
1931 
1932  if (rna_ref.IsSetType()) {
1933  switch (rna_ref.GetType()) {
1934  case 1:
1935  p = "precursor_RNA";
1936  break;
1937  case 2:
1938  p = "mRNA";
1939  break;
1940  case 3:
1941  p = "tRNA";
1942  break;
1943  case 4:
1944  p = "rRNA";
1945  break;
1946  case 5:
1947  p = "snRNA";
1948  break;
1949  case 6:
1950  p = "scRNA";
1951  break;
1952  case 7:
1953  p = "snoRNA";
1954  break;
1955  case 255:
1956  p = "misc_RNA";
1957  break;
1958  default:
1959  p = "an RNA";
1960  }
1961  } else
1962  p = "an RNA";
1963  }
1964  }
1965 
1966  if (! p)
1967  p = "a";
1968 
1969  ret = (MatchArrayString(feat_no_gene, p) < 0);
1970 
1971  if (! glp)
1972  return (ret);
1973 
1974  if (StringEqu(p, "misc_feature"))
1975  gnp->got_misc = true;
1976 
1977  glp->fname = p;
1978 
1979  if (! feat.IsSetLocation()) {
1980  glp->location = "Unknown";
1981  return (ret);
1982  }
1983 
1984  string loc_str;
1985  GetLocationStr(feat.GetLocation(), loc_str);
1986  if (loc_str.empty())
1987  glp->location = "Unknown";
1988  else {
1989  if (loc_str.size() > 55) {
1990  loc_str = loc_str.substr(0, 50);
1991  loc_str += " ...";
1992  }
1993  glp->location = loc_str;
1994  }
1995 
1996  return (ret);
1997 }
1998 
1999 /**********************************************************/
2000 static list<AccMinMax> fta_get_acc_minmax_strand(const CSeq_loc* location,
2001  GeneLocsPtr gelop)
2002 {
2003  list<AccMinMax> ammps;
2004  Int4 from;
2005  Int4 to;
2006 
2007  gelop->strand = -2;
2008 
2009  if (location) {
2010  for (CSeq_loc_CI loc(*location); loc; ++loc) {
2011  CConstRef<CSeq_loc> cur_loc = loc.GetRangeAsSeq_loc();
2012  CRef<CSeq_id> pId;
2013  if (cur_loc->IsInt()) {
2014  const CSeq_interval& interval = cur_loc->GetInt();
2015  if (interval.IsSetId()) {
2016  pId = Ref(new CSeq_id());
2017  pId->Assign(interval.GetId());
2018  }
2019  from = interval.GetFrom();
2020  to = interval.GetTo();
2021 
2022  ENa_strand strand = interval.IsSetStrand() ? interval.GetStrand() : eNa_strand_unknown;
2023  if (gelop->strand == -2)
2024  gelop->strand = strand;
2025  else if (gelop->strand != strand)
2026  gelop->strand = -1;
2027  } else if (cur_loc->IsPnt()) {
2028  const CSeq_point& point = cur_loc->GetPnt();
2029  if (point.IsSetId()) {
2030  pId = Ref(new CSeq_id());
2031  pId->Assign(point.GetId());
2032  }
2033  from = point.GetPoint();
2034  to = from;
2035 
2036  ENa_strand strand = point.IsSetStrand() ? point.GetStrand() : eNa_strand_unknown;
2037  if (gelop->strand == -2)
2038  gelop->strand = strand;
2039  else if (gelop->strand != strand)
2040  gelop->strand = -1;
2041  } else {
2042  continue;
2043  }
2044 
2045  _ASSERT(pId);
2046 
2047  if (gelop->verymin > from)
2048  gelop->verymin = from;
2049  if (gelop->verymax < to)
2050  gelop->verymax = to;
2051 
2052  bool found_id = false;
2053  auto it = ammps.begin();
2054  while (! found_id && it != ammps.end()) {
2055  auto& ammp = *it;
2056  if (s_IdsMatch(ammp.pId, pId)) {
2057  if (from < ammp.min) {
2058  ammp.min = from;
2059  }
2060  if (to > ammp.max) {
2061  ammp.max = to;
2062  }
2063  found_id = true;
2064  }
2065  ++it;
2066  }
2067 
2068  if (! found_id) {
2069  AccMinMax ammp;
2070  ammp.pId = pId;
2071  ammp.min = from;
2072  ammp.max = to;
2073  ammps.push_back(ammp);
2074  }
2075  }
2076  }
2077  return ammps;
2078 }
2079 
2080 /**********************************************************/
2081 static void fta_append_feat_list(GeneNodePtr gnp, const CSeq_loc* location, const char* gene, const char* locus_tag)
2082 {
2083  if (! gnp || ! location)
2084  return;
2085 
2086  GeneLocsPtr gelop = new GeneLocs();
2087  if (gene) {
2088  gelop->gene = gene;
2089  } else {
2090  gelop->gene.clear();
2091  }
2092  if (locus_tag) {
2093  gelop->locus = locus_tag;
2094  } else {
2095  gelop->locus.clear();
2096  }
2097 
2098  gelop->verymin = -1;
2099  gelop->verymax = -1;
2100  gelop->ammp = fta_get_acc_minmax_strand(location, gelop);
2101  gelop->next = gnp->gelop;
2102  gnp->gelop = gelop;
2103 }
2104 
2105 /**********************************************************/
2106 static bool CompareGeneLocsMinMax(const GeneLocsPtr& sp1, const GeneLocsPtr& sp2)
2107 {
2108  Int4 status = sp2->verymax - sp1->verymax;
2109  if (status == 0)
2110  status = sp1->verymin - sp2->verymin;
2111 
2112  return status < 0;
2113 }
2114 
2115 /**********************************************************/
2117 {
2118  Int4 index;
2119  Int4 total;
2120 
2121  GeneLocsPtr glp;
2122 
2123  total = 0;
2124  for (glp = gelop; glp; glp = glp->next)
2125  total++;
2126 
2127  vector<GeneLocsPtr> temp(total);
2128 
2129  for (index = 0, glp = gelop; glp; glp = glp->next)
2130  temp[index++] = glp;
2131 
2132  std::sort(temp.begin(), temp.end(), CompareGeneLocsMinMax);
2133 
2134  gelop = glp = temp[0];
2135  for (index = 0; index < total - 1; glp = glp->next, index++)
2136  glp->next = temp[index + 1];
2137 
2138  glp = temp[total - 1];
2139  glp->next = nullptr;
2140 
2141  return (gelop);
2142 }
2143 
2144 /**********************************************************/
2146 {
2147  if (! glp || ! feat.IsSetDbxref())
2148  return;
2149 
2150  CSeq_feat::TDbxref dbxrefs;
2151  for (CSeq_feat::TDbxref::iterator dbxref = feat.SetDbxref().begin(); dbxref != feat.SetDbxref().end(); ++dbxref) {
2152  if (! (*dbxref)->IsSetTag() || ! (*dbxref)->IsSetDb() ||
2153  (*dbxref)->GetDb() != "WormBase" ||
2154  ! StringEquN((*dbxref)->GetTag().GetStr().c_str(), "WBGene", 6)) {
2155  dbxrefs.push_back(*dbxref);
2156  continue;
2157  }
2158 
2159  glp->wormbase.insert((*dbxref)->GetTag().GetStr());
2160  }
2161 
2162  if (dbxrefs.empty())
2163  feat.ResetDbxref();
2164  else
2165  feat.SetDbxref().swap(dbxrefs);
2166 }
2167 
2168 /**********************************************************/
2169 static void fta_collect_olts(GeneListPtr glp, CSeq_feat& feat)
2170 {
2171  if (! glp || ! feat.IsSetQual())
2172  return;
2173 
2174  TQualVector quals;
2175  for (TQualVector::iterator qual = feat.SetQual().begin(); qual != feat.SetQual().end(); ++qual) {
2176  if (! (*qual)->IsSetQual() || ! (*qual)->IsSetVal() ||
2177  (*qual)->GetQual() != "old_locus_tag") {
2178  quals.push_back(*qual);
2179  continue;
2180  }
2181 
2182  glp->olt.insert((*qual)->GetVal());
2183  }
2184 
2185  if (quals.empty())
2186  feat.ResetQual();
2187  else
2188  feat.SetQual().swap(quals);
2189 }
2190 
2191 /**********************************************************
2192  *
2193  * SrchGene:
2194  * -- add new gene qual information into "glp"
2195  *
2196  **********************************************************/
2197 static void SrchGene(CSeq_annot::C_Data::TFtable& feats, GeneNodePtr gnp, Int4 length, const CSeq_id& id)
2198 {
2199  GeneList* newglp;
2200 
2201  if (! gnp)
2202  return;
2203 
2204  for (auto& feat : feats) {
2205  const string gene = CpTheQualValue(feat->GetQual(), "gene");
2206  const string locus_tag = CpTheQualValue(feat->GetQual(), "locus_tag");
2207 
2208  const CSeq_loc* cur_loc = feat->IsSetLocation() ? &feat->GetLocation() : nullptr;
2209  if (gene.empty() && locus_tag.empty()) {
2210  if (GetFeatNameAndLoc(nullptr, *feat, gnp))
2211  fta_append_feat_list(gnp, cur_loc, nullptr, nullptr);
2212  continue;
2213  }
2214 
2215  const string pseudogene = CpTheQualValue(feat->GetQual(), "pseudogene");
2216 
2217  newglp = new GeneList;
2218  newglp->locus = gene;
2219  newglp->locus_tag = locus_tag;
2220  newglp->pseudogene = pseudogene;
2221 
2222  fta_collect_wormbases(newglp, *feat);
2223  fta_collect_olts(newglp, *feat);
2224  if (GetFeatNameAndLoc(newglp, *feat, gnp))
2225  fta_append_feat_list(gnp, cur_loc, gene.c_str(), locus_tag.c_str());
2226 
2227  newglp->feat.Reset();
2228  if (gnp->simple_genes == false && cur_loc && cur_loc->IsMix()) {
2229  newglp->feat.Reset(new CSeq_feat);
2230  newglp->feat->Assign(*feat);
2231  }
2232 
2233  newglp->slibp = GetLowHighFromSeqLoc(cur_loc, length, id);
2234  if (! newglp->slibp) {
2235  delete newglp;
2236  continue;
2237  }
2238  if (gnp->simple_genes == false && feat->IsSetData() &&
2239  WeDontNeedToJoinThis(feat->GetData()))
2240  newglp->leave = 1;
2241 
2242  newglp->genefeat = IfCDSGeneFeat(*feat, CSeqFeatData::e_Gene, "gene");
2243 
2244  // newglp->maploc = (feat->IsSetQual() ? GetTheQualValue(feat->SetQual(), "map") : nullptr);
2245  if (feat->IsSetQual()) {
2246  auto qual = GetTheQualValue(feat->SetQual(), "map");
2247  if (qual) {
2248  newglp->maploc = qual;
2249  }
2250  }
2251  newglp->segnum = gnp->segindex;
2252 
2253  GetGeneSyns(feat->GetQual(), gene.c_str(), newglp->syn);
2254 
2255  newglp->loc.Reset();
2256  if (cur_loc) {
2257  newglp->loc.Reset(new CSeq_loc);
2258  newglp->loc->Assign(*cur_loc);
2259  }
2260 
2261  newglp->todel = false;
2262  if (IfCDSGeneFeat(*feat, CSeqFeatData::e_Cdregion, "CDS") == false && newglp->genefeat == false)
2263  newglp->pseudo = false;
2264  else
2265  newglp->pseudo = feat->IsSetPseudo() ? feat->GetPseudo() : false;
2266 
2267  newglp->allpseudo = feat->IsSetPseudo() ? feat->GetPseudo() : false;
2268 
2269  if (fta_rnas_cds_feat(*feat)) {
2270  newglp->noleft = newglp->slibp->noleft;
2271  newglp->noright = newglp->slibp->noright;
2272  } else {
2273  newglp->noleft = false;
2274  newglp->noright = false;
2275  }
2276 
2277  newglp->next = gnp->glp;
2278  gnp->glp = newglp;
2279  }
2280 
2281  if (gnp->gelop)
2282  gnp->gelop = fta_sort_feat_list(gnp->gelop);
2283 }
2284 
2285 /**********************************************************/
2287 {
2288  CdssListPtr newclp;
2289  SeqlocInfoblkPtr slip;
2290 
2291  for (const auto& feat : feats) {
2292  if (IfCDSGeneFeat(*feat, CSeqFeatData::e_Cdregion, "CDS") == false)
2293  continue;
2294 
2295  const CSeq_loc* cur_loc = feat->IsSetLocation() ? &feat->GetLocation() : nullptr;
2296 
2297  slip = GetLowHighFromSeqLoc(cur_loc, -99, id);
2298  if (! slip)
2299  continue;
2300 
2301  newclp = new CdssList();
2302  newclp->segnum = segnum;
2303  newclp->from = slip->from;
2304  newclp->to = slip->to;
2305  delete slip;
2306 
2307  newclp->next = clp;
2308  clp = newclp;
2309  }
2310 
2311  return (clp);
2312 }
2313 
2314 
2315 /**********************************************************
2316  *
2317  * FindGene:
2318  * -- there is no accession number if it is a segmented
2319  * set entry.
2320  *
2321  **********************************************************/
2322 static void FindGene(CBioseq& bioseq, GeneNodePtr gene_node)
2323 {
2324  const CSeq_id* first_id = nullptr;
2325  if (! bioseq.GetId().empty())
2326  first_id = *bioseq.GetId().begin();
2327 
2328  if (! first_id) {
2329  return;
2330  }
2331 
2332  if (IsSegBioseq(*first_id))
2333  return; /* process this bioseq */
2334 
2336  gene_node->circular = true;
2337 
2338  if (! bioseq.IsSetAnnot())
2339  return;
2340 
2341  for (auto& annot : bioseq.SetAnnot()) {
2342  if (! annot->IsFtable())
2343  continue;
2344 
2345  CRef<CSeq_id> id = CpSeqIdAcOnly(*first_id, gene_node->accver);
2346 
2347  ++(gene_node->segindex); /* > 1, if segment set */
2348 
2349  SrchGene(annot->SetData().SetFtable(), gene_node, bioseq.GetLength(), *id);
2350 
2351  if (gene_node->skipdiv) {
2352  gene_node->clp = SrchCdss(annot->SetData().SetFtable(), gene_node->clp, gene_node->segindex, *id);
2353  }
2354 
2355  if (gene_node->glp && gene_node->flag == false) {
2356  /* the seqentry is not a member of segment seqentry
2357  */
2358  gene_node->bioseq = &bioseq;
2359  gene_node->flag = true;
2360  }
2361 
2362  break;
2363  }
2364 }
2365 
2366 /**********************************************************/
2367 static void GeneCheckForStrands(const GeneListPtr _glp)
2368 {
2369  GeneListPtr glp(_glp);
2370  GeneListPtr tglp;
2371 
2372  if (! glp)
2373  return;
2374 
2375  while (glp) {
2376  if (glp->locus.empty() && glp->locus_tag.empty())
2377  continue;
2378  bool got = false;
2379  for (tglp = glp->next; tglp; tglp = tglp->next) {
2380  if (tglp->locus.empty() && tglp->locus_tag.empty())
2381  continue;
2382  if (fta_cmp_locusyn(glp, tglp) != 0)
2383  break;
2384  if (! got && glp->slibp && tglp->slibp &&
2385  glp->slibp->strand != tglp->slibp->strand)
2386  got = true;
2387  }
2388  if (got) {
2389  ErrPostEx(SEV_WARNING, ERR_GENEREF_BothStrands, "Gene name %s\"%s\" has been used for features on both strands.", (glp->locus.empty()) ? "with locus_tag " : "", (glp->locus.empty()) ? glp->locus_tag.c_str() : glp->locus.c_str());
2390  }
2391  glp = tglp;
2392  }
2393 }
2394 
2395 /**********************************************************/
2396 static bool LocusTagCheck(GeneListPtr glp, bool& resort)
2397 {
2398  GeneListPtr tglp;
2399  GeneListPtr glpstart;
2400  GeneListPtr glpstop;
2401  bool same_gn;
2402  bool same_lt;
2403  bool ret;
2404 
2405  resort = false;
2406  if (! glp || ! glp->next)
2407  return true;
2408 
2409  glpstop = nullptr;
2410  for (ret = true; glp; glp = glpstop->next) {
2411  if (glp->locus.empty() && glp->locus_tag.empty())
2412  continue;
2413 
2414  glpstart = glp;
2415  glpstop = glp;
2416  for (tglp = glp->next; tglp; tglp = tglp->next) {
2417  if (NStr::EqualNocase(glp->locus, tglp->locus) == false ||
2418  NStr::EqualNocase(glp->locus_tag, tglp->locus_tag) == false)
2419  break;
2420  glpstop = tglp;
2421  }
2422 
2423  for (tglp = glpstop->next; tglp; tglp = tglp->next) {
2424  if (tglp->locus.empty() && tglp->locus_tag.empty())
2425  continue;
2426 
2427  same_gn = NStr::EqualNocase(glpstart->locus, tglp->locus);
2428  same_lt = NStr::EqualNocase(glpstart->locus_tag, tglp->locus_tag);
2429 
2430  if ((same_gn == false && same_lt == false) || (same_gn && same_lt) ||
2431  same_gn || glpstart->locus_tag.empty())
2432  continue;
2433 
2434  for (glp = glpstart;; glp = glp->next) {
2435  ErrPostEx(SEV_REJECT, ERR_FEATURE_InconsistentLocusTagAndGene, "Inconsistent pairs /gene+/locus_tag are encountered: \"%s\"+\"%s\" : %s feature at %s : \"%s\"+\"%s\" : %s feature at %s. Entry dropped.", (glp->locus.empty()) ? "(NULL)" : glp->locus.c_str(), (glp->locus_tag.empty()) ? "(NULL)" : glp->locus_tag.c_str(), glp->fname.c_str(), glp->location.c_str(), (tglp->locus.empty()) ? "(NULL)" : tglp->locus.c_str(), (tglp->locus_tag.empty()) ? "(NULL)" : tglp->locus_tag.c_str(), tglp->fname.c_str(), tglp->location.c_str());
2436  if (glp == glpstop)
2437  break;
2438  }
2439  ret = false;
2440  }
2441 
2442  if (! glpstart->locus.empty() && ! glpstart->locus_tag.empty() &&
2443  NStr::EqualCase(glpstart->locus.c_str(), glpstart->locus_tag.c_str())) {
2444  for (glp = glpstart;; glp = glp->next) {
2445  glp->locus.clear();
2446  resort = true;
2447  if (glp == glpstop)
2448  break;
2449  }
2450  }
2451  }
2452 
2453  return (ret);
2454 }
2455 
2456 /**********************************************************/
2458 {
2459  GeneListPtr glp;
2460  GeneListPtr tglp;
2461 
2462  if (! gnp || ! gnp->glp)
2463  return;
2464 
2465  for (glp = gnp->glp; glp; glp = glp->next) {
2466  if (glp->locus_tag.empty() || ! glp->locus.empty() ||
2467  (glp->fname != "misc_feature"))
2468  continue;
2469 
2470  for (tglp = gnp->glp; tglp; tglp = tglp->next) {
2471  if (tglp->fname.empty() ||
2472  (tglp->fname == "misc_feature")) { // Looks suspicious - check again
2473  continue;
2474  }
2475  if (tglp->locus.empty() || tglp->locus[0] == '\0' ||
2476  ! NStr::EqualNocase(glp->locus_tag, tglp->locus_tag))
2477  continue;
2478  glp->locus = tglp->locus;
2479  break;
2480  }
2481  }
2482 }
2483 
2484 /**********************************************************/
2486 {
2487  GeneListPtr glp;
2488  GeneListPtr glpprev;
2489  GeneListPtr glpnext;
2490  GeneListPtr tglp;
2491 
2492  if (! gnp || ! gnp->glp)
2493  return;
2494 
2495  for (glp = gnp->glp; glp; glp = glp->next) {
2496  if (glp->todel || ! glp->syn.empty() || (glp->fname != "misc_feature"))
2497  continue;
2498 
2499  bool got = false;
2500  for (tglp = gnp->glp; tglp; tglp = tglp->next) {
2501  if (tglp->todel || (tglp->fname == "misc_feature"))
2502  continue;
2503  if (! NStr::EqualNocase(glp->locus, tglp->locus) ||
2504  ! NStr::EqualNocase(glp->locus_tag, tglp->locus_tag))
2505  continue;
2506  if (tglp->syn.empty()) {
2507  got = true;
2508  continue;
2509  }
2510 
2511  sequence::ECompare cmp_res = sequence::Compare(*glp->loc, *tglp->loc, nullptr, sequence::fCompareOverlapping);
2512  if (cmp_res != sequence::eContained)
2513  continue;
2514 
2515  glp->todel = true;
2516  }
2517  if (glp->todel && got)
2518  glp->todel = false;
2519  }
2520 
2521  for (glpprev = nullptr, glp = gnp->glp; glp; glp = glpnext) {
2522  glpnext = glp->next;
2523  if (! glp->todel) {
2524  glp->loc.Reset();
2525  glpprev = glp;
2526  continue;
2527  }
2528 
2529  if (! glpprev)
2530  gnp->glp = glpnext;
2531  else
2532  glpprev->next = glpnext;
2533 
2534  glp->next = nullptr;
2535  GeneListFree(glp);
2536  }
2537 }
2538 
2539 /**********************************************************/
2540 static bool GeneLocusCheck(const TSeqFeatList& feats, bool diff_lt)
2541 {
2542  bool ret = true;
2543 
2544  for (TSeqFeatList::const_iterator feat = feats.begin(); feat != feats.end(); ++feat) {
2545  const CGene_ref& gene_ref1 = (*feat)->GetData().GetGene();
2546  if (! gene_ref1.IsSetLocus() || ! gene_ref1.IsSetLocus_tag())
2547  continue;
2548 
2549  TSeqFeatList::const_iterator feat_next = feat,
2550  feat_cur = feat;
2551  for (++feat_next; feat_next != feats.end(); ++feat_next, ++feat_cur) {
2552  const CGene_ref& gene_ref2 = (*feat_next)->GetData().GetGene();
2553 
2554  if (! gene_ref2.IsSetLocus() || ! gene_ref2.IsSetLocus_tag())
2555  continue;
2556 
2557  if (gene_ref1.GetLocus() != gene_ref2.GetLocus()) {
2558  feat = feat_cur;
2559  break;
2560  }
2561 
2562  if (gene_ref1.GetLocus_tag() == gene_ref2.GetLocus_tag())
2563  continue;
2564 
2565  string loc1_str, loc2_str;
2566 
2567  GetLocationStr((*feat)->GetLocation(), loc1_str);
2568  GetLocationStr((*feat_next)->GetLocation(), loc2_str);
2569 
2570  if (diff_lt == false) {
2573  "Multiple instances of the \"%s\" gene encountered: \"%s\"+\"%s\" : gene feature at \"%s\" : \"%s\"+\"%s\" : gene feature at \"%s\". Entry dropped.",
2574  gene_ref1.GetLocus().c_str(),
2575  gene_ref1.GetLocus().c_str(),
2576  gene_ref1.GetLocus_tag().c_str(),
2577  loc1_str.c_str(),
2578  gene_ref2.GetLocus().c_str(),
2579  gene_ref2.GetLocus_tag().c_str(),
2580  loc2_str.c_str());
2581  ret = false;
2582  } else
2585  "Multiple instances of the \"%s\" gene encountered: \"%s\"+\"%s\" : gene feature at \"%s\" : \"%s\"+\"%s\" : gene feature at \"%s\".",
2586  gene_ref1.GetLocus().c_str(),
2587  gene_ref1.GetLocus().c_str(),
2588  gene_ref1.GetLocus_tag().c_str(),
2589  loc1_str.c_str(),
2590  gene_ref2.GetLocus().c_str(),
2591  gene_ref2.GetLocus_tag().c_str(),
2592  loc2_str.c_str());
2593  }
2594  }
2595 
2596  return (ret);
2597 }
2598 
2599 /**********************************************************/
2600 static void CheckGene(TEntryList& seq_entries, ParserPtr pp, GeneRefFeats& gene_refs)
2601 {
2602  IndexblkPtr ibp;
2603  GeneNodePtr gnp;
2604  GeneListPtr glp;
2605 
2606  char* div;
2607 
2608  bool resort;
2609 
2610  if (! pp)
2611  return;
2612 
2613  ibp = pp->entrylist[pp->curindx];
2614  if (! ibp)
2615  return;
2616 
2617  div = ibp->division;
2618 
2619  gnp = new GeneNode;
2620  gnp->accver = pp->accver;
2621  gnp->circular = false;
2622  gnp->simple_genes = pp->simple_genes;
2623  gnp->got_misc = false;
2624  if (div && (StringEqu(div, "BCT") || StringEqu(div, "SYN")))
2625  gnp->skipdiv = true;
2626  else
2627  gnp->skipdiv = false;
2628 
2629  for (auto& entry : seq_entries) {
2630  for (CTypeIterator<CBioseq> bioseq(Begin(*entry)); bioseq; ++bioseq) {
2631  FindGene(*bioseq, gnp);
2632  }
2633 
2634  for (CTypeIterator<CBioseq_set> bio_set(Begin(*entry)); bio_set; ++bio_set) {
2635  if (bio_set->GetClass() == CBioseq_set::eClass_parts) /* parts, the place to put GeneRefPtr */
2636  {
2637  gnp->bioseq_set = &(*bio_set);
2638  gnp->flag = true;
2639  gnp->seg = true;
2640  break;
2641  }
2642  }
2643  }
2644 
2645  if (gnp->got_misc) {
2646  MiscFeatsWithoutGene(gnp);
2648  } else {
2649  for (glp = gnp->glp; glp; glp = glp->next) {
2650  glp->loc.Reset();
2651  }
2652  }
2653 
2654  if (gnp->glp) {
2655  gnp = sort_gnp_list(gnp);
2656 
2657  resort = false;
2658  if (LocusTagCheck(gnp->glp, resort) == false) {
2659  ibp->drop = true;
2660  GeneListFree(gnp->glp);
2661  CdssListFree(gnp->clp);
2662  GeneLocsFree(gnp->gelop);
2663  delete gnp;
2664 
2665  return;
2666  }
2667 
2668  if (resort)
2669  gnp = sort_gnp_list(gnp);
2670 
2671  ScannGeneName(gnp, gnp->bioseq ? gnp->bioseq->GetLength() : 0);
2672 
2673  if (GeneLocusCheck(gnp->feats, pp->diff_lt) == false) {
2674  ibp->drop = true;
2675  GeneListFree(gnp->glp);
2676  CdssListFree(gnp->clp);
2677  GeneLocsFree(gnp->gelop);
2678 
2679  delete gnp;
2680 
2681  return;
2682  }
2683 
2684  if (gnp->circular == false || ibp->got_plastid == false)
2685  GeneCheckForStrands(gnp->glp);
2686 
2687  if (! gnp->feats.empty()) {
2688  CBioseq::TAnnot* annots = nullptr;
2689  if (gnp->seg) {
2690  annots = &gnp->bioseq_set->SetAnnot();
2691  } else {
2692  annots = &gnp->bioseq->SetAnnot();
2693  }
2694 
2695  for (auto& cur_annot : *annots) {
2696  if (! cur_annot->IsFtable())
2697  continue;
2698 
2699  size_t advance = cur_annot->GetData().GetFtable().size();
2700  cur_annot->SetData().SetFtable().splice(cur_annot->SetData().SetFtable().end(), gnp->feats);
2701 
2702  gene_refs.first = cur_annot->SetData().SetFtable().begin();
2703  std::advance(gene_refs.first, advance);
2704  gene_refs.last = cur_annot->SetData().SetFtable().end();
2705  gene_refs.valid = true;
2706  break;
2707  }
2708 
2709  if (annots->empty()) {
2710  CRef<CSeq_annot> annot(new CSeq_annot);
2711  annot->SetData().SetFtable().assign(gnp->feats.begin(), gnp->feats.end());
2712 
2713  if (gnp->seg) {
2714  gnp->bioseq_set->SetAnnot().push_back(annot);
2715  } else {
2716  gnp->bioseq->SetAnnot().push_back(annot);
2717  }
2718 
2719  gene_refs.first = annot->SetData().SetFtable().begin();
2720  gene_refs.last = annot->SetData().SetFtable().end();
2721  gene_refs.valid = true;
2722  }
2723  }
2724 
2725  GeneListFree(gnp->glp);
2726  gnp->glp = nullptr;
2727  }
2728 
2729  CdssListFree(gnp->clp);
2730  GeneLocsFree(gnp->gelop);
2731 
2732  delete gnp;
2733 }
2734 
2736  const CSeq_loc& loc1,
2737  const CSeq_loc& loc2,
2738  CScope* scope)
2739 {
2740  const auto strand1 = loc1.GetStrand() == eNa_strand_minus ? eNa_strand_minus : eNa_strand_plus;
2741  const auto strand2 = loc2.GetStrand() == eNa_strand_minus ? eNa_strand_minus : eNa_strand_plus;
2742  if (strand1 != strand2) {
2743  return false;
2744  }
2745  if (loc1.IsInt() && loc2.IsInt()) {
2746  const auto& intv1 = loc1.GetInt();
2747  const auto& intv2 = loc2.GetInt();
2748  return (intv1.GetFrom() >= intv2.GetFrom() && intv1.GetTo() <= intv2.GetTo());
2749  }
2750  auto compResult = sequence::Compare(
2751  loc1, loc2, nullptr, sequence::fCompareOverlapping);
2752  return (compResult == sequence::eContained || compResult == sequence::eSame);
2753 }
2754 
2755 
2756 /**********************************************************
2757  *
2758  * SeqFeatXrefPtr GetXrpForOverlap(glap, sfp, gerep):
2759  *
2760  * Get xrp from list by locus only if cur gene overlaps
2761  * other gene and asn2ff cannot find it.
2762  *
2763  **********************************************************/
2765  const char* acnum,
2766  GeneRefFeats& gene_refs,
2767  const TSeqLocInfoList& llocs,
2768  const CSeq_feat& feat,
2769  CGene_ref& gerep)
2770 {
2771  int count = 0;
2772  /*
2773  ENa_strand strand = feat.GetLocation().IsSetStrand() ? feat.GetLocation().GetStrand() : eNa_strand_unknown;
2774  if (strand == eNa_strand_other)
2775  strand = eNa_strand_unknown;
2776  */
2777 
2778  CConstRef<CGene_ref> gene_ref;
2779 
2780  TSeqLocInfoList::const_iterator cur_loc = llocs.begin();
2781  CRef<CSeq_loc> loc = fta_seqloc_local(feat.GetLocation(), acnum); // passed as consts
2782 
2783  bool stopped = false;
2784  if (gene_refs.valid) {
2785  for (auto cur_feat = gene_refs.first; cur_feat != gene_refs.last; ++cur_feat) {
2786  if (! GenelocContained(*loc, *cur_loc->loc, nullptr)) {
2787  ++cur_loc;
2788  continue; /* f location is within sfp one */
2789  }
2790 
2791  count++;
2792  if (gene_ref.Empty()) {
2793  gene_ref.Reset(&(*cur_feat)->GetData().GetGene());
2794  } else if (fta_cmp_gene_refs(*gene_ref, (*cur_feat)->GetData().GetGene())) {
2795  stopped = true;
2796  break;
2797  }
2798 
2799  ++cur_loc;
2800  }
2801  }
2802 
2803  CRef<CSeqFeatXref> xref;
2804 
2805  if (count == 0 || (! stopped && gene_ref.NotEmpty() && fta_cmp_gene_refs(*gene_ref, gerep) == 0))
2806  return xref;
2807 
2808  xref.Reset(new CSeqFeatXref);
2809  xref->SetData().SetGene(gerep);
2810 
2811  return xref;
2812 }
2813 
2814 static void FixAnnot(CBioseq::TAnnot& annots, const char* acnum, GeneRefFeats& gene_refs, TSeqLocInfoList& llocs)
2815 {
2816  for (CBioseq::TAnnot::iterator annot = annots.begin(); annot != annots.end();) {
2817  if (! (*annot)->IsSetData() || ! (*annot)->GetData().IsFtable()) {
2818  ++annot;
2819  continue;
2820  }
2821 
2822  CSeq_annot::C_Data::TFtable& feat_table = (*annot)->SetData().SetFtable();
2823  for (TSeqFeatList::iterator feat = feat_table.begin(); feat != feat_table.end();) {
2824  if ((*feat)->IsSetData() && (*feat)->GetData().IsImp()) {
2825  const CImp_feat& imp = (*feat)->GetData().GetImp();
2826  if (imp.GetKey() == "gene") {
2827  feat = feat_table.erase(feat);
2828  continue;
2829  }
2830  }
2831 
2832  char* gene = (*feat)->IsSetQual() ? GetTheQualValue((*feat)->SetQual(), "gene") : nullptr;
2833  char* locus_tag = (*feat)->IsSetQual() ? GetTheQualValue((*feat)->SetQual(), "locus_tag") : nullptr;
2834  if (! gene && ! locus_tag) {
2835  ++feat;
2836  continue;
2837  }
2838 
2839  CRef<CGene_ref> gene_ref(new CGene_ref);
2840  if (gene)
2841  gene_ref->SetLocus(gene);
2842  if (locus_tag)
2843  gene_ref->SetLocus_tag(locus_tag);
2844 
2845  TSynSet syns;
2846  GetGeneSyns((*feat)->GetQual(), gene, syns);
2847  if (! syns.empty())
2848  gene_ref->SetSyn().assign(syns.begin(), syns.end());
2849 
2850  CRef<CSeqFeatXref> xref = GetXrpForOverlap(acnum, gene_refs, llocs, *(*feat), *gene_ref);
2851  if (xref.NotEmpty())
2852  (*feat)->SetXref().push_back(xref);
2853 
2854  DeleteQual((*feat)->SetQual(), "gene");
2855  DeleteQual((*feat)->SetQual(), "locus_tag");
2856  DeleteQual((*feat)->SetQual(), "gene_synonym");
2857 
2858  if ((*feat)->GetQual().empty())
2859  (*feat)->ResetQual();
2860  ++feat;
2861 
2862  if (gene) {
2863  MemFree(gene);
2864  }
2865  }
2866 
2867  if (feat_table.empty())
2868  annot = annots.erase(annot);
2869  else
2870  ++annot;
2871  }
2872 }
2873 
2874 /**********************************************************
2875  *
2876  * GeneQuals:
2877  * -- find match_gene Gene-ref for qual /gene
2878  * -- find best_gene Gene-ref
2879  * remove qual
2880  * make Xref if best_gene and match_gene don't match
2881  * remove misc_feat 'gene'
2882  *
2883  **********************************************************/
2884 static void GeneQuals(TEntryList& seq_entries, const char* acnum, GeneRefFeats& gene_refs)
2885 {
2886  TSeqLocInfoList llocs;
2887  if (gene_refs.valid) {
2888  for (TSeqFeatList::iterator feat = gene_refs.first; feat != gene_refs.last; ++feat) {
2889  SeqLocInfo info;
2890  info.strand = (*feat)->GetLocation().IsSetStrand() ? (*feat)->GetLocation().GetStrand() : eNa_strand_unknown;
2891 
2892  if (info.strand == eNa_strand_other)
2893  info.strand = eNa_strand_unknown;
2894 
2895  info.loc = fta_seqloc_local((*feat)->GetLocation(), acnum);
2896  llocs.push_back(info);
2897  }
2898  }
2899 
2900  for (auto& entry : seq_entries) {
2901  for (CTypeIterator<CBioseq_set> bio_set(Begin(*entry)); bio_set; ++bio_set) {
2902  if (bio_set->IsSetAnnot())
2903  FixAnnot(bio_set->SetAnnot(), acnum, gene_refs, llocs);
2904  }
2905 
2906  for (CTypeIterator<CBioseq> bioseq(Begin(*entry)); bioseq; ++bioseq) {
2907  if (bioseq->IsSetAnnot())
2908  FixAnnot(bioseq->SetAnnot(), acnum, gene_refs, llocs);
2909  }
2910  }
2911 }
2912 
2913 /**********************************************************/
2914 static void fta_collect_genes(const CBioseq& bioseq, std::set<string>& genes)
2915 {
2916  for (const auto& annot : bioseq.GetAnnot()) {
2917  if (! annot->IsFtable())
2918  continue;
2919 
2920  for (const auto& feat : annot->GetData().GetFtable()) {
2921  for (const auto& qual : feat->GetQual()) {
2922  if (! qual->IsSetQual() || qual->GetQual() != "gene" ||
2923  ! qual->IsSetVal() || qual->GetVal().empty())
2924  continue;
2925 
2926  genes.insert(qual->GetVal());
2927  }
2928  }
2929  }
2930 }
2931 
2932 /**********************************************************/
2933 static void fta_fix_labels(CBioseq& bioseq, const std::set<string>& genes)
2934 {
2935  if (! bioseq.IsSetAnnot())
2936  return;
2937 
2938  for (auto& annot : bioseq.SetAnnot()) {
2939  if (! annot->IsFtable())
2940  continue;
2941 
2942  for (auto& feat : annot->SetData().SetFtable()) {
2943 
2944  if (! feat->IsSetQual())
2945  continue;
2946 
2947  for (CSeq_feat::TQual::iterator qual = feat->SetQual().begin(); qual != feat->SetQual().end(); ++qual) {
2948  if (! (*qual)->IsSetQual() || (*qual)->GetQual() != "label" ||
2949  ! (*qual)->IsSetVal() || (*qual)->GetVal().empty())
2950  continue;
2951 
2952  const string& cur_val = (*qual)->GetVal();
2953  std::set<string>::const_iterator ci = genes.lower_bound(cur_val);
2954  if (*ci == cur_val) {
2955  CRef<CGb_qual> new_qual(new CGb_qual);
2956  new_qual->SetQual("gene");
2957  new_qual->SetVal(cur_val);
2958 
2959  feat->SetQual().insert(qual, new_qual);
2960  }
2961  }
2962  }
2963  }
2964 }
2965 
2966 /**********************************************************/
2967 void DealWithGenes(TEntryList& seq_entries, ParserPtr pp)
2968 {
2969  if (pp->source == Parser::ESource::Flybase) {
2970  std::set<string> genes;
2971  for (const auto& entry : seq_entries) {
2972  for (CBioseq_CI bioseq(GetScope(), *entry); bioseq; ++bioseq) {
2973  fta_collect_genes(*bioseq->GetCompleteBioseq(), genes);
2974  }
2975  }
2976 
2977  if (! genes.empty()) {
2978  for (auto& entry : seq_entries) {
2979  for (CTypeIterator<CBioseq> bioseq(Begin(*entry)); bioseq; ++bioseq) {
2980  fta_fix_labels(*bioseq, genes);
2981  }
2982  }
2983  }
2984  }
2985 
2986  /* make GeneRefBlk if any gene qualifier exists
2987  */
2988  GeneRefFeats gene_refs;
2989  gene_refs.valid = false;
2990  CheckGene(seq_entries, pp, gene_refs);
2991 
2992  if (gene_refs.valid) {
2993  for (TSeqFeatList::iterator feat = gene_refs.first; feat != gene_refs.last; ++feat) {
2994  if ((*feat)->IsSetLocation()) {
2995  int partial = sequence::SeqLocPartialCheck((*feat)->GetLocation(), &GetScope());
2996  if (partial & sequence::eSeqlocPartial_Start ||
2997  partial & sequence::eSeqlocPartial_Stop) // not internal
2998  (*feat)->SetPartial(true);
2999 
3000  if (! pp->genenull || ! (*feat)->GetLocation().IsMix())
3001  continue;
3002 
3003  CSeq_loc_mix& mix_loc = (*feat)->SetLocation().SetMix();
3004 
3005  CRef<CSeq_loc> null_loc(new CSeq_loc);
3006  null_loc->SetNull();
3007 
3008  CSeq_loc_mix::Tdata::iterator it_loc = mix_loc.Set().begin();
3009  ++it_loc;
3010  for (; it_loc != mix_loc.Set().end(); ++it_loc) {
3011  it_loc = mix_loc.Set().insert(it_loc, null_loc);
3012  ++it_loc;
3013  }
3014  }
3015  }
3016  }
3017 
3018  ProcNucProt(pp, seq_entries, gene_refs);
3019 
3020  /* remove /gene if they can be mapped to GenRef
3021  */
3022  if (! seq_entries.empty())
3023  GeneQuals(seq_entries, pp->entrylist[pp->curindx]->acnum, gene_refs);
3024 }
3025 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool fta_strings_same(const char *s1, const char *s2)
Definition: add.cpp:891
bool IsSegBioseq(const CSeq_id &id)
Definition: asci_blk.cpp:2506
#define false
Definition: bool.h:36
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
Definition: Dbtag.hpp:53
@Gb_qual.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:61
@Imp_feat.hpp User-defined methods of the data storage class.
Definition: Imp_feat.hpp:54
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
CScope –.
Definition: scope.hpp:92
CSeqFeatXref –.
Definition: SeqFeatXref.hpp:66
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
void AddSeqLoc(const CSeq_loc &other)
Template class for iteration on objects of class C.
Definition: iterator.hpp:673
static const char location[]
Definition: config.c:97
static char * join(int argc, char *argv[], const char sep[])
Definition: dbpivot.c:359
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
#define ERR_FEATURE_InconsistentPseudogene
Definition: flat2err.h:387
#define ERR_FEATURE_InconsistentLocusTagAndGene
Definition: flat2err.h:353
#define ERR_FEATURE_MultipleOldLocusTags
Definition: flat2err.h:390
#define ERR_FEATURE_MultipleWBGeneXrefs
Definition: flat2err.h:388
#define ERR_GENEREF_BothStrands
Definition: flat2err.h:459
#define ERR_FEATURE_MultipleGenesDifferentLocusTags
Definition: flat2err.h:385
#define ERR_GENEREF_NoUniqMaploc
Definition: flat2err.h:458
list< CRef< objects::CSeq_entry > > TEntryList
std::list< CRef< objects::CSeq_id > > TSeqIdList
Definition: ftablock.h:57
std::list< CRef< objects::CSeq_feat > > TSeqFeatList
Definition: ftablock.h:55
bool StringEquN(const char *s1, const char *s2, size_t n)
Definition: ftacpp.hpp:106
bool StringEqu(const char *s1, const char *s2)
Definition: ftacpp.hpp:96
void MemFree(char *p)
Definition: ftacpp.hpp:55
const char * leave_imp_feat[]
Definition: genref.cpp:221
static GeneLocsPtr fta_sort_feat_list(GeneLocsPtr gelop)
Definition: genref.cpp:2116
static bool WeDontNeedToJoinThis(const CSeqFeatData &data)
Definition: genref.cpp:1808
USING_SCOPE(objects)
static void GetGeneSyns(const TQualVector &quals, const char *name, TSynSet &syns)
Definition: genref.cpp:1845
static void fta_add_olt(GeneListPtr fromglp, GeneListPtr toglp)
Definition: genref.cpp:987
static void ScannGeneName(GeneNodePtr gnp, Int4 seqlen)
Definition: genref.cpp:1602
static Int4 fta_cmp_gene_syns(const TSynSet &syn1, const TSynSet &syn2)
Definition: genref.cpp:323
static bool fta_rnas_cds_feat(const CSeq_feat &feat)
Definition: genref.cpp:1870
const char * feat_no_gene[]
Definition: genref.cpp:219
static CRef< CSeq_loc > MakeCLoc(MixLocPtr mlp, bool noleft, bool noright)
Definition: genref.cpp:1366
void DealWithGenes(TEntryList &seq_entries, ParserPtr pp)
Definition: genref.cpp:2967
bool GenelocContained(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Definition: genref.cpp:2735
static void MessWithSegGenes(GeneNodePtr gnp)
Definition: genref.cpp:1032
static void GeneLocsFree(GeneLocsPtr gelop)
Definition: genref.cpp:586
static void SrchGene(CSeq_annot::C_Data::TFtable &feats, GeneNodePtr gnp, Int4 length, const CSeq_id &id)
Definition: genref.cpp:2197
static MixLocPtr MixLocCopy(MixLocPtr mlp)
Definition: genref.cpp:812
Int2 leave_rna_feat[]
Definition: genref.cpp:230
static void fta_collect_genes(const CBioseq &bioseq, std::set< string > &genes)
Definition: genref.cpp:2914
static void CheckGene(TEntryList &seq_entries, ParserPtr pp, GeneRefFeats &gene_refs)
Definition: genref.cpp:2600
static void GeneCheckForStrands(const GeneListPtr _glp)
Definition: genref.cpp:2367
static void GeneListFree(GeneListPtr glp)
Definition: genref.cpp:537
static void GetLocationStr(const CSeq_loc &loc, string &str)
Definition: genref.cpp:239
static bool GetFeatNameAndLoc(GeneListPtr glp, const CSeq_feat &feat, GeneNodePtr gnp)
Definition: genref.cpp:1913
static void fta_append_feat_list(GeneNodePtr gnp, const CSeq_loc *location, const char *gene, const char *locus_tag)
Definition: genref.cpp:2081
static Int4 fta_cmp_locusyn(GeneListPtr glp1, GeneListPtr glp2)
Definition: genref.cpp:385
static bool CompareGeneListName(const GeneListPtr &sp1, const GeneListPtr &sp2)
Definition: genref.cpp:417
static void fta_make_seq_int(MixLocPtr mlp, bool noleft, bool noright, CSeq_interval &interval)
Definition: genref.cpp:1331
static CdssListPtr SrchCdss(CSeq_annot::C_Data::TFtable &feats, CdssListPtr clp, Int4 segnum, const CSeq_id &id)
Definition: genref.cpp:2286
static bool fta_check_feat_overlap(GeneLocsPtr gelop, GeneListPtr c, MixLocPtr mlp, Int4 from, Int4 to)
Definition: genref.cpp:1113
static void SortMixLoc(GeneListPtr c)
Definition: genref.cpp:1533
static void fta_fix_labels(CBioseq &bioseq, const std::set< string > &genes)
Definition: genref.cpp:2933
static void fta_add_wormbase(GeneListPtr fromglp, GeneListPtr toglp)
Definition: genref.cpp:980
static bool DoWeHaveCdssInBetween(GeneListPtr c, Int4 to, CdssListPtr clp)
Definition: genref.cpp:721
static void RemoveUnneededMiscFeats(GeneNodePtr gnp)
Definition: genref.cpp:2485
std::set< string > TWormbaseSet
Definition: genref.cpp:78
static void fta_collect_wormbases(GeneListPtr glp, CSeq_feat &feat)
Definition: genref.cpp:2145
static void FixMixLoc(GeneListPtr c, GeneLocsPtr gelop)
Definition: genref.cpp:1187
static GeneNodePtr sort_gnp_list(GeneNodePtr gnp)
Definition: genref.cpp:496
static bool CompareGeneLocsMinMax(const GeneLocsPtr &sp1, const GeneLocsPtr &sp2)
Definition: genref.cpp:2106
static void fta_seqloc_del_far(CSeq_loc &locs, const Char *acnum, const CSeq_id *id)
Definition: genref.cpp:273
static void CircularSeqLocFormat(GeneListPtr c)
Definition: genref.cpp:1428
static Int4 fta_cmp_gene_refs(const CGene_ref &grp1, const CGene_ref &grp2)
Definition: genref.cpp:347
static SeqlocInfoblkPtr GetLowHighFromSeqLoc(const CSeq_loc *origslp, Int4 length, const CSeq_id &orig_id)
Definition: genref.cpp:603
static bool fta_seqid_same(const CSeq_id &sid, const Char *acnum, const CSeq_id *id)
Definition: genref.cpp:246
std::set< string > TSynSet
Definition: genref.cpp:77
static bool LocusTagCheck(GeneListPtr glp, bool &resort)
Definition: genref.cpp:2396
static void GeneQuals(TEntryList &seq_entries, const char *acnum, GeneRefFeats &gene_refs)
Definition: genref.cpp:2884
static void fta_check_pseudogene(GeneListPtr tglp, GeneListPtr glp)
Definition: genref.cpp:994
static void fta_make_seq_pnt(MixLocPtr mlp, bool noleft, bool noright, CSeq_point &point)
Definition: genref.cpp:1351
static void MixLocFree(MixLocPtr mlp)
Definition: genref.cpp:526
static bool ConfirmCircular(MixLocPtr mlp)
Definition: genref.cpp:1159
static bool s_IdsMatch(const CRef< CSeq_id > &pId1, const CRef< CSeq_id > &pId2)
Definition: genref.cpp:826
static MixLocPtr EasySeqLocMerge(MixLocPtr first, MixLocPtr second, bool join)
Definition: genref.cpp:840
static Int2 GetMergeOrder(MixLocPtr first, MixLocPtr second)
Definition: genref.cpp:1397
static CRef< CSeq_loc > fta_seqloc_local(const CSeq_loc &orig, const Char *acnum)
Definition: genref.cpp:311
static void fta_collect_olts(GeneListPtr glp, CSeq_feat &feat)
Definition: genref.cpp:2169
static bool GeneLocusCheck(const TSeqFeatList &feats, bool diff_lt)
Definition: genref.cpp:2540
std::set< string > TLocusTagSet
Definition: genref.cpp:79
static void CdssListFree(CdssListPtr clp)
Definition: genref.cpp:574
static void FixAnnot(CBioseq::TAnnot &annots, const char *acnum, GeneRefFeats &gene_refs, TSeqLocInfoList &llocs)
Definition: genref.cpp:2814
static MixLocPtr CircularSeqLocCollect(MixLocPtr first, MixLocPtr second)
Definition: genref.cpp:958
static CRef< CSeq_id > CpSeqIdAcOnly(const CSeq_id &id, bool accver)
Definition: genref.cpp:1792
static void FindGene(CBioseq &bioseq, GeneNodePtr gene_node)
Definition: genref.cpp:2322
static list< AccMinMax > fta_get_acc_minmax_strand(const CSeq_loc *location, GeneLocsPtr gelop)
Definition: genref.cpp:2000
static void AddGeneFeat(GeneListPtr glp, const string &maploc, TSeqFeatList &feats)
Definition: genref.cpp:736
static bool DoWeHaveGeneInBetween(GeneListPtr c, SeqlocInfoblkPtr second, GeneNodePtr gnp)
Definition: genref.cpp:688
static bool IfCDSGeneFeat(const CSeq_feat &feat, Uint1 choice, const char *key)
Definition: genref.cpp:1899
static void MiscFeatsWithoutGene(GeneNodePtr gnp)
Definition: genref.cpp:2457
static CRef< CSeqFeatXref > GetXrpForOverlap(const char *acnum, GeneRefFeats &gene_refs, const TSeqLocInfoList &llocs, const CSeq_feat &feat, CGene_ref &gerep)
Definition: genref.cpp:2764
list< SeqLocInfo > TSeqLocInfoList
Definition: genref.cpp:76
#define SEV_WARNING
Definition: gicache.c:90
#define SEV_ERROR
Definition: gicache.c:91
#define SEV_REJECT
Definition: gicache.c:92
#define ErrPostEx(sev, err_code,...)
Definition: ncbierr.hpp:78
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2144
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2039
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
Definition: Seq_id.cpp:411
@ e_NO
different SeqId types-can't compare
Definition: Seq_id.hpp:550
@ e_YES
SeqIds compared, but are different.
Definition: Seq_id.hpp:551
void SetMix(TMix &v)
Definition: Seq_loc.hpp:987
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
void SetPnt(TPnt &v)
Definition: Seq_loc.hpp:985
const_iterator end(void) const
Definition: Seq_loc.cpp:1034
const_iterator begin(void) const
Definition: Seq_loc.cpp:1028
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
CRef< CSeq_loc > Subtract(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper, ILengthGetter *len_getter) const
Subtract seq-loc from this, merge/sort resulting ranges depending on flags.
Definition: Seq_loc.cpp:5087
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
Definition: Seq_loc.cpp:858
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
Definition: Seq_loc.cpp:3467
void SetNull(void)
Override all setters to incorporate cache invalidation.
Definition: Seq_loc.hpp:960
@ eEmpty_Allow
ignore empty locations
Definition: Seq_loc.hpp:458
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
Definition: iterator.hpp:1004
int SeqLocPartialCheck(const CSeq_loc &loc, CScope *scope)
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
ECompare
@ eSeqlocPartial_Stop
@ eSeqlocPartial_Start
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eContains
First CSeq_loc contains second.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
Definition: ncbiobj.hpp:1385
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:1392
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
char Char
Alias for char.
Definition: ncbitype.h:93
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
Definition: ncbistr.hpp:5325
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
TSyn & SetSyn(void)
Assign a value to Syn data member.
Definition: Gene_ref_.hpp:774
bool IsSetSyn(void) const
synonyms for locus Check if a value has been assigned to Syn data member.
Definition: Gene_ref_.hpp:756
const TSyn & GetSyn(void) const
Get the Syn member data.
Definition: Gene_ref_.hpp:768
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
Definition: Gene_ref_.hpp:781
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
Definition: Gene_ref_.hpp:493
void SetLocus(const TLocus &value)
Assign a value to Locus data member.
Definition: Gene_ref_.hpp:514
void SetLocus_tag(const TLocus_tag &value)
Assign a value to Locus_tag data member.
Definition: Gene_ref_.hpp:802
void SetMaploc(const TMaploc &value)
Assign a value to Maploc data member.
Definition: Gene_ref_.hpp:655
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
Definition: Gene_ref_.hpp:793
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:505
bool IsLim(void) const
Check if variant Lim is selected.
Definition: Int_fuzz_.hpp:636
TLim GetLim(void) const
Get the variant data.
Definition: Int_fuzz_.hpp:642
ELim
some limit value
Definition: Int_fuzz_.hpp:209
@ eLim_gt
greater than
Definition: Int_fuzz_.hpp:211
@ eLim_lt
less than
Definition: Int_fuzz_.hpp:212
TType GetType(void) const
Get the Type member data.
Definition: RNA_ref_.hpp:529
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
Definition: RNA_ref_.hpp:604
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
Definition: RNA_ref_.hpp:510
const TName & GetName(void) const
Get the variant data.
Definition: RNA_ref_.hpp:484
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
bool IsName(void) const
Check if variant Name is selected.
Definition: RNA_ref_.hpp:478
void SetQual(const TQual &value)
Assign a value to Qual data member.
Definition: Gb_qual_.hpp:221
const TKey & GetKey(void) const
Get the Key member data.
Definition: Imp_feat_.hpp:259
vector< CRef< CDbtag > > TDbxref
Definition: Seq_feat_.hpp:123
TDbxref & SetDbxref(void)
Assign a value to Dbxref data member.
Definition: Seq_feat_.hpp:1339
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
Definition: Seq_feat_.hpp:1135
E_Choice Which(void) const
Which variant is currently selected.
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
bool IsCdregion(void) const
Check if variant Cdregion is selected.
bool IsImp(void) const
Check if variant Imp is selected.
bool IsSetKey(void) const
Check if a value has been assigned to Key data member.
Definition: Imp_feat_.hpp:247
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
void SetVal(const TVal &value)
Assign a value to Val data member.
Definition: Gb_qual_.hpp:268
bool IsBiosrc(void) const
Check if variant Biosrc is selected.
void SetPseudo(TPseudo value)
Assign a value to Pseudo data member.
Definition: Seq_feat_.hpp:1374
const TRna & GetRna(void) const
Get the variant data.
void ResetDbxref(void)
Reset Dbxref data member.
Definition: Seq_feat_.cpp:188
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
Definition: Seq_feat_.hpp:1321
TQual & SetQual(void)
Assign a value to Qual data member.
Definition: Seq_feat_.hpp:1153
bool IsRna(void) const
Check if variant Rna is selected.
void ResetQual(void)
Reset Qual data member.
Definition: Seq_feat_.cpp:136
const TImp & GetImp(void) const
Get the variant data.
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
Definition: Seq_feat_.hpp:1105
void SetTo(TTo value)
Assign a value to To data member.
const TFuzz_from & GetFuzz_from(void) const
Get the Fuzz_from member data.
void SetPoint(TPoint value)
Assign a value to Point data member.
Definition: Seq_point_.hpp:312
void SetId(TId &value)
Assign a value to Id data member.
Definition: Seq_point_.cpp:61
bool IsSetId(void) const
WARNING: this used to be optional Check if a value has been assigned to Id data member.
Definition: Seq_point_.hpp:378
bool IsMix(void) const
Check if variant Mix is selected.
Definition: Seq_loc_.hpp:552
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
const TId & GetId(void) const
Get the Id member data.
bool IsSetPoint(void) const
Check if a value has been assigned to Point data member.
Definition: Seq_point_.hpp:284
bool IsPacked_pnt(void) const
Check if variant Packed_pnt is selected.
Definition: Seq_loc_.hpp:546
const TPnt & GetPnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:238
TPoint GetPoint(void) const
Get the Point member data.
Definition: Seq_point_.hpp:303
const TFuzz_to & GetFuzz_to(void) const
Get the Fuzz_to member data.
bool IsSetA(void) const
connection to a least one residue Check if a value has been assigned to A data member.
Definition: Seq_bond_.hpp:201
void SetId(TId &value)
Assign a value to Id data member.
void SetStrand(TStrand value)
Assign a value to Strand data member.
Definition: Seq_point_.hpp:359
TFrom GetFrom(void) const
Get the From member data.
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
Definition: Seq_point_.cpp:71
bool IsSetFuzz(void) const
Check if a value has been assigned to Fuzz data member.
Definition: Seq_point_.hpp:408
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
Definition: Seq_point_.hpp:420
const TId & GetId(void) const
Get the Id member data.
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_point_.hpp:390
void SetFrom(TFrom value)
Assign a value to From data member.
TStrand GetStrand(void) const
Get the Strand member data.
Definition: Seq_point_.hpp:350
bool IsBond(void) const
Check if variant Bond is selected.
Definition: Seq_loc_.hpp:564
const TPacked_pnt & GetPacked_pnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:260
void SetFuzz_to(TFuzz_to &value)
Assign a value to Fuzz_to data member.
void SetFuzz_from(TFuzz_from &value)
Assign a value to Fuzz_from data member.
const TA & GetA(void) const
Get the A member data.
Definition: Seq_bond_.hpp:213
bool IsSetTo(void) const
Check if a value has been assigned to To data member.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
Tdata & Set(void)
Assign a value to data member.
bool IsSetFuzz_to(void) const
Check if a value has been assigned to Fuzz_to data member.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
Definition: Seq_point_.hpp:331
TStrand GetStrand(void) const
Get the Strand member data.
bool IsSetId(void) const
WARNING: this used to be optional Check if a value has been assigned to Id data member.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
bool IsSetFrom(void) const
Check if a value has been assigned to From data member.
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
bool IsSetFuzz_from(void) const
Check if a value has been assigned to Fuzz_from data member.
void SetStrand(TStrand value)
Assign a value to Strand data member.
bool IsPnt(void) const
Check if variant Pnt is selected.
Definition: Seq_loc_.hpp:540
const TBond & GetBond(void) const
Get the variant data.
Definition: Seq_loc_.cpp:326
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_other
Definition: Na_strand_.hpp:70
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
@ eClass_parts
parts for 2 or 3
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
TTopology GetTopology(void) const
Get the Topology member data.
Definition: Seq_inst_.hpp:733
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
Definition: Bioseq_.hpp:354
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
Definition: Bioseq_.hpp:372
const TAnnot & GetAnnot(void) const
Get the Annot member data.
Definition: Bioseq_.hpp:366
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
list< CRef< CSeq_feat > > TFtable
Definition: Seq_annot_.hpp:193
list< CRef< CSeq_annot > > TAnnot
Definition: Bioseq_.hpp:97
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
int i
if(yy_accept[yy_current_state])
static MDB_envinfo info
Definition: mdb_load.c:37
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::KEY key
const char * tag
#define nullptr
Definition: ncbimisc.hpp:45
T max(T x_, T y_)
T min(T x_, T y_)
void ProcNucProt(ParserPtr pp, TEntryList &seq_entries, GeneRefFeats &gene_refs)
Definition: nucprot.cpp:2522
static const char * str(char *buf, int n)
Definition: stats.c:84
Int4 max
Definition: genref.cpp:87
CRef< CSeq_id > pId
Definition: genref.cpp:82
Int4 min
Definition: genref.cpp:86
Int4 segnum
Definition: genref.cpp:169
Int4 to
Definition: genref.cpp:168
Int4 from
Definition: genref.cpp:167
CdssList * next
Definition: genref.cpp:170
SeqlocInfoblkPtr slibp
Definition: genref.cpp:140
Int4 segnum
Definition: genref.cpp:141
string pseudogene
Definition: genref.cpp:134
CRef< CSeq_feat > feat
Definition: genref.cpp:138
TLocusTagSet olt
Definition: genref.cpp:150
bool circular
Definition: genref.cpp:160
bool noright
Definition: genref.cpp:156
string locus
Definition: genref.cpp:131
bool pseudo
Definition: genref.cpp:152
TSynSet syn
Definition: genref.cpp:148
bool noleft
Definition: genref.cpp:155
TWormbaseSet wormbase
Definition: genref.cpp:149
string maploc
Definition: genref.cpp:135
Uint1 leave
Definition: genref.cpp:142
MixLocPtr mlp
Definition: genref.cpp:146
string location
Definition: genref.cpp:158
bool genefeat
Definition: genref.cpp:154
bool allpseudo
Definition: genref.cpp:153
string locus_tag
Definition: genref.cpp:133
CRef< CSeq_loc > loc
Definition: genref.cpp:144
bool todel
Definition: genref.cpp:159
GeneList * next
Definition: genref.cpp:161
string fname
Definition: genref.cpp:157
string gene
Definition: genref.cpp:93
Int4 verymin
Definition: genref.cpp:96
GeneLocs * next
Definition: genref.cpp:99
Int4 verymax
Definition: genref.cpp:97
list< AccMinMax > ammp
Definition: genref.cpp:98
string locus
Definition: genref.cpp:94
Int4 strand
Definition: genref.cpp:95
bool accver
Definition: genref.cpp:188
CdssListPtr clp
Definition: genref.cpp:190
CBioseq_set * bioseq_set
Definition: genref.cpp:181
TSeqFeatList feats
Definition: genref.cpp:185
GeneLocsPtr gelop
Definition: genref.cpp:192
bool seg
Definition: genref.cpp:178
Int4 segindex
Definition: genref.cpp:187
bool circular
Definition: genref.cpp:191
GeneListPtr glp
Definition: genref.cpp:183
bool flag
Definition: genref.cpp:176
bool skipdiv
Definition: genref.cpp:189
CBioseq * bioseq
Definition: genref.cpp:180
bool simple_genes
Definition: genref.cpp:193
bool got_misc
Definition: genref.cpp:194
GeneNode()
Definition: genref.cpp:197
TSeqFeatList::iterator first
Definition: nucprot.h:65
TSeqFeatList::iterator last
Definition: nucprot.h:66
bool valid
Definition: nucprot.h:64
Char division[4]
Definition: ftablock.h:174
bool drop
Definition: ftablock.h:185
bool got_plastid
Definition: ftablock.h:236
bool noright
Definition: genref.cpp:123
bool noleft
Definition: genref.cpp:122
Int4 max
Definition: genref.cpp:120
Int4 min
Definition: genref.cpp:119
MixLoc * next
Definition: genref.cpp:125
ENa_strand strand
Definition: genref.cpp:121
Int4 numint
Definition: genref.cpp:124
CRef< CSeq_id > pId
Definition: genref.cpp:118
vector< IndexblkPtr > entrylist
ENa_strand strand
Definition: genref.cpp:73
CRef< CSeq_loc > loc
Definition: genref.cpp:72
TSeqIdList ids
Definition: genref.cpp:110
bool noright
Definition: genref.cpp:112
ENa_strand strand
Definition: genref.cpp:108
#define _ASSERT
CScope & GetScope()
bool DeleteQual(TQualVector &qlist, const Char *qual)
Definition: utilfeat.cpp:190
char * GetTheQualValue(TQualVector &qlist, const Char *qual)
Definition: utilfeat.cpp:157
string CpTheQualValue(const TQualVector &qlist, const Char *qual)
Definition: utilfeat.cpp:128
void MakeLocStrCompatible(string &str)
Definition: utilfeat.cpp:464
Int2 MatchArrayString(const char **array, const char *text)
Definition: utilfun.cpp:615
std::vector< CRef< objects::CGb_qual > > TQualVector
Definition: xgbfeat.h:12
Modified on Wed Mar 27 11:17:03 2024 by modify_doxy.py rev. 669887