NCBI C++ ToolKit
cuCdCore.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuCdCore.hpp 102299 2024-04-19 11:25:14Z lanczyck $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Adapted from CDTree-1 code by Chris Lanczycki
27  *
28  * File Description:
29  *
30  * Subclass of CCdd object.
31  * Originally forked from the CDTree3 CCd class.
32  *
33  * ===========================================================================
34  */
35 
36 #ifndef CU_CDCORE_HPP
37 #define CU_CDCORE_HPP
38 
42 #include <map>
43 
46 BEGIN_SCOPE(cd_utils)
47 
51 };
52 
53 const int PENDING_ROW_START = 1000000;
54 
56 {
57 public:
58  enum AlignmentSrc {
59  NORMAL_ALIGNMENT = 0,
60  PENDING_ALIGNMENT
61  };
63  USE_NORMAL_ALIGNMENT=0,
65  USE_ALL_ALIGNMENT
66  };
67 
68  CCdCore(void); // constructor
69  virtual ~CCdCore(void); // destructor
70 
71  /* CD identifier methods */
72  string GetAccession(int& Version) const; // get accession and version of CD
73  string GetAccession() const;
74  void SetAccession(string Accession, int Version); // set accession and version of CD
75  void SetAccession(string Accession);
76  void EraseUID(); // erase CD's uid
77  int GetUID() const; // return the first 'uid' found, or '0' if none exist.
78  // (this is the PSSM_id for a published CD)
79 
80  bool HasCddId(const CCdd_id& id) const; // is 'id' an identifier for this CD
81 
82  /* Basic information about CD */
83  string GetLongDescription(); // long description of CD
84  string GetUpdateDate(); // last update date of CD
85  int GetNumRows() const; // number of rows in CD
86  int GetNumSequences() const; // number of sequences in CD
87  int GetNumRowsWithSequences() const; // number of rows with a valid sequence index
88  int GetAlignmentLength() const; // total number aligned residues
89  int GetPSSMLength() const; // number of residues in master, from first to last aligned residue
90 
91  int GetNumBlocks() const; // return number of blocks in alignment (0 if no alignment)
92  bool GetCDBlockLengths(vector<int>& lengths) const;
93  bool GetBlockStartsForRow(int rowIndex, vector<int>& starts) const;
94 
95  /* Find/convert sequence list and row indices */
96  int GetSeqIndexForRowIndex(int rowIndex) const; // map alignment row to sequence index (-1 if invalid row)
97  int GetMasterSeqIndex() const; // get sequence index of the master sequence (-1 if fails)
98  int GetSeqIndex(const CRef<CSeq_id>& SeqID) const; // map seqId to the first possible sequence list index (-1 if fails)
99  int GetNthMatchFor(CRef<CSeq_id>& ID, int N); // get RowIndex of Nth match
100 
101  /* find all row indices for a seqID (return # found) */
102  int GetAllRowIndicesForSeqId(const CRef<CSeq_id>& SeqID, list<int>& rows) const;
103  int GetAllRowIndicesForSeqId(const CRef<CSeq_id>& SeqID, vector<int>& rows) const;
104 
105 
106  /* Access CD info via alignment row number */
107  bool GetGI(int Row, TGi& GI, bool ignorePDBs = true); // get GI of Row (if ignorePDBs = true, don't look @PDBs for the GI)
108  bool GetPDB(int Row, const CPDB_seq_id*& pPDB); // get PDB ID of Row
109  int GetLowerBound(int Row) const; // get Row lower alignment bound; return INVALID_MAPPED_POSITION on failure
110  int GetUpperBound(int Row) const; // get Row upper alignment bound; return INVALID_MAPPED_POSITION on failure
111  bool Get_GI_or_PDB_String_FromAlignment(int RowIndex, std::string& Str, bool Pad, int Len) const ;
112 
113  string GetSpeciesForRow(int Row); // find the species string for alignment row
114  string GetSequenceStringByRow(int rowId); // return the full sequence for rowId
115  bool GetSeqEntryForRow(int rowId, CRef< CSeq_entry >& seqEntry) const; // get the indicated seq_entry
116  bool GetBioseqForRow(int rowId, CRef< CBioseq >& bioseq);
117 
118 
119  /* Access CD info via sequence list index */
120  TGi GetGIFromSequenceList(int SeqIndex) const; // get GI from sequence list
121  string GetDefline(int SeqIndex) const; // get description from sequence list
122 
123  string GetSequenceStringByIndex(int SeqIndex); // return the full sequence for index SeqIndex
124  string GetSpeciesForIndex(int SeqIndex); // get species name from sequence list
125  bool GetSeqEntryForIndex(int seqIndex, CRef< CSeq_entry > & seqEntry) const; // was cdGetSeq from algMerge
126  bool GetBioseqForIndex(int seqIndex, CRef< CBioseq >& bioseq) ;
127 
128  // Obtain a copy of the first bioseq found that matches the ID passed in.
129  // Returns true if this is possible; false otherwise.
130  bool CopyBioseqForSeqId(const CRef< CSeq_id>& seqId, CRef< CBioseq >& bioseq) const;
131 
132  // Recursively look for a bioseq with the given seqid in the sequence list; return the first instance found.
133  bool GetBioseqWithSeqId(const CRef< CSeq_id>& seqid, const CBioseq*& bioseq) const;
134 
135  /* Examine alignment for a SeqId or footprint */
136  bool HasSeqId(const CRef<CSeq_id>& ID) const; // see if ID matches any ID in alignment (deprecate???)
137  bool HasSeqId(const CRef<CSeq_id>& ID, int& RowIndex) const; // same, but return row that matches
138 
139  /* SeqID getters ... from alignment info */
140  bool GetSeqIDForRow(int Pair, int DenDiagRow, CRef<CSeq_id>& SeqID) const; // get SeqID from alignment
141  bool GetSeqIDFromAlignment(int RowIndex, CRef<CSeq_id>& SeqID) const;
142 
143  /* SeqID getters ... from sequence list */
144  // CAUTION: the first method here may not give you the CSeq_id you expect/want.
145  // when there are multiple CSeq_ids for the specified index, priority is
146  // given to the PDB-type identifier, then to a GI, and then to 'other'.
147  // If any other type is present, this method returned false and an empty CRef.
148  bool GetSeqIDForIndex(int SeqIndex, CRef<CSeq_id>& SeqID) const; // get SeqID from sequence list
149  bool GetSeqIDs(int SeqIndex, list< CRef< CSeq_id > >& SeqIDs); // get all SeqIDs from sequence list
150  const list< CRef< CSeq_id > >& GetSeqIDs(int SeqIndex) const; // get all SeqIDs from sequence list
151 
152  /* Sequence or row removal */
153  bool EraseOtherRows(const std::vector<int>& KeepRows); // erase all rows from alignment not in KeepRows
154  bool EraseTheseRows(const std::vector<int>& TossRows); // erase all rows from alignment in TossRows
155  void EraseSequence(int SeqIndex); // erase a sequence from the set of seqs
156  void EraseSequences(); // erase sequences not in alignment
157  void ErasePendingRows(set<int>& rows);
158  void ErasePendingRow(int row);
159 
160  /* Methods for structures, structure alignments, MMDB identifiers */
161  bool Has3DMaster() const;
162  int Num3DAlignments() const;
163  bool GetRowsForMmdbId(int mmdbId, list<int>& rows) const; // find all rows with this mmdbId
164  bool GetRowsWithMmdbId(vector<int>& rows) const; // find all rows with a mmdbid
165  bool GetMmdbId(int SeqIndex, int& id) const; // get mmdb-id from sequence list
166 
167 
168  // If the master is a structure, fill in the master3d field with its PDB SeqId.
169  // Return true if the field is populated at exit (whether or not it was correctly set
170  // to begin with), or false if master is not a structure or otherwise failed.
171  // If checkRow1WhenConsensusMaster is true and the master is a consensus sequence,
172  // then synchronize master3d based on row 1, as above; otherwise, master3d is always emptied.
173  // *** NOTE: this method *always* resets master3d first. So, when false is returned,
174  // master3d will be empty.
175  bool SynchronizeMaster3D(bool checkRow1WhenConsensusMaster = true);
176 
177 /* CD alignment methods */
178 
179  // Returns coordinate on 'otherRow' that is mapped to 'thisPos' on 'thisRow'.
180  // Returns INVALID_MAPPED_POSITION on failure.
181  int MapPositionToOtherRow(int thisRow, int thisPos, int otherRow) const;
182 
183  // Formerly GetSeqPosition(...). Returns INVALID_MAPPED_POSITION on failure.
184  int MapPositionToOtherRow(const CRef< CSeq_align >& seqAlign, int thisPos, CoordMapDir mapDir) const;
185 
186  bool IsSeqAligns() const; // true is CD has alignment
187  bool GetAlignment(CRef< CSeq_annot >& seqAnnot); // return the first Seq_annot of type 'align'
188  const CRef< CSeq_annot >& GetAlignment() const; // return the first seq_annot of type 'align'
189 
190  const list< CRef< CSeq_align > >& GetSeqAligns() const; // get the list of Seq-aligns in alignment
191  list< CRef< CSeq_align > >& GetSeqAligns(); // get the list of Seq-aligns in alignment (editable)
192  bool GetSeqAlign(int Row, CRef< CSeq_align >& seqAlign); // get the Rowth Seq-align (editable)
193  const CRef< CSeq_align >& GetSeqAlign(int Row) const; // get the Rowth Seq-align
194  //int FindDDBySeqId(CRef<CSeq_id>& SeqID, TDendiag* & ResultDD, TDendiag* pNeedOverlapDD, int isSelf,int istart);
195 
196  bool UsesConsensusSequenceAsMaster() const;
197  bool HasConsensusSequence() const;
198  int GetRowsWithConsensus(vector<int>& consensusRows) const;
199  bool FindConsensusInSequenceList(vector<int>* indices = NULL) const;
200 
201  size_t GetNumPending() const {return(GetPending().size());}
202 
203  //add aligns or sequences to CD
204 
205  bool AddSeqAlign(CRef< CSeq_align > seqAlign);
206  bool AddPendingSeqAlign(CRef< CSeq_align > seqAlign);
207  bool AddSequence(CRef< CSeq_entry > seqAntry);
208  void Clear();
209 
210  /* Comment methods (there can be multiple comments) */
211  void SetComment(CCdd_descr::TComment oldComment, CCdd_descr::TComment newComment);
212 
213  /* CD annotations */
214  // These add a specific type of Cdd-descr to the CD.
215  // Typically, duplicates will not be added; functions return
216  // 'false' when attempting to add a duplicate description.
217  bool AddComment(const string& comment);
218  bool AddOthername(const string& othername);
219  bool AddTitle(const string& title);
220  bool AddPmidReference(TEntrezId pmid);
221  bool AddSource(const string& source, bool removeExistingSources = true);
222  bool AddCreateDate(); // uses the current time
223 
224  // Return the first title in the list of CCdd_descrs; by convention there should
225  // be at most one. If there is no title, an empty string is returned.
226  string GetTitle() const;
227 
228  // Return all 'title' strings found in the list of CCdd_descrs.
229  // Return value is the number of such strings returned.
230  unsigned int GetTitles(vector<string>& titles) const;
231 
232  // Removes any CCdd_descr of the specified choice type.
233  bool RemoveCddDescrsOfType(int cddDescrChoice);
234 
235 
236  /* Alignment & structure annotation methods */
237  bool AllResiduesInRangeAligned(int rowId, int from, int to) const;
238  bool AlignAnnotsValid(string* err = NULL) const; // one of the checks for re-mastering/validation
239  int GetNumAlignmentAnnotations();
240  string GetAlignmentAnnotationDescription(int Index);
241 
242  bool HasParentType(EClassicalOrComponent parentType) const;
243  bool HasParentType(CDomain_parent::EParent_type parentType) const;
244  bool GetClassicalParentId(const CCdd_id*& parentId) const; // get id of classical parent
245  string GetClassicalParentAccession(int& Version) const; // get accession and version of parent
246  string GetClassicalParentAccession() const;
247 
248 protected:
249 
250  // Return true only if 'descr' was added.
251  bool AddCddDescr(CRef< CCdd_descr >& descr);
252 
253 private:
254 
255  static bool GetBioseqWithSeqid(const CRef< CSeq_id>& seqid, const list< CRef< CSeq_entry > >& bsset, const CBioseq*& bioseq);
256 
257  // Prohibit copy constructor and assignment operator
260 };
261 
262 /////////////////// end of CCd inline methods
263 
264 
265 END_SCOPE(cd_utils)
267 
268 
269 #endif // NEWCDCCD_HPP
CCdCore(const CCdCore &value)
AlignmentUsage
Definition: cuCdCore.hpp:62
@ USE_PENDING_ALIGNMENT
Definition: cuCdCore.hpp:64
size_t GetNumPending() const
Definition: cuCdCore.hpp:201
AlignmentSrc
Definition: cuCdCore.hpp:58
CCdCore & operator=(const CCdCore &value)
CCdd_id –.
Definition: Cdd_id.hpp:66
Definition: Cdd.hpp:51
#define Len
int GetLowerBound(const CRef< CSeq_align > &seqAlign, bool onMaster)
Definition: cuAlign.cpp:258
int GetUpperBound(const CRef< CSeq_align > &seqAlign, bool onMaster)
Definition: cuAlign.cpp:273
USING_SCOPE(objects)
const int PENDING_ROW_START
Definition: cuCdCore.hpp:53
EClassicalOrComponent
Definition: cuCdCore.hpp:48
@ eClassicalParent
Definition: cuCdCore.hpp:49
@ eComponentParent
Definition: cuCdCore.hpp:50
CoordMapDir
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
Definition: ncbimisc.hpp:1041
string
Definition: cgiapp.hpp:690
#define NULL
Definition: ncbistd.hpp:225
NCBI_XOBJUTIL_EXPORT string GetTitle(const CBioseq_Handle &hnd, TGetTitleFlags flags=0)
Definition: seqtitle.cpp:106
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_CDUTILS_EXPORT
Definition: ncbi_export.h:376
const TPending & GetPending(void) const
Get the Pending member data.
Definition: Cdd_.hpp:1478
CRef< CDate_std > GetUpdateDate(const char *ptr, Parser::ESource source)
Definition: indx_blk.cpp:610
void AddComment(CSeq_feat &feat, const string &comment)
Definition: utils.cpp:44
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
const CharType(& source)[N]
Definition: pointer.h:1149
const CConstRef< CSeq_id > GetAccession(const CSeq_id_Handle &id_handle)
#define row(bind, expected)
Definition: string_bind.c:73
static CRef< CSeqdesc > AddSource(CRef< CSeq_entry > entry, string taxname)
static void AddTitle(CRef< CSeq_entry > entry, string defline)
#define N
Definition: crc32.c:57
Modified on Fri Sep 20 14:57:44 2024 by modify_doxy.py rev. 669887