NCBI C++ ToolKit
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuCD.hpp 95643 2021-12-03 16:24:20Z lanczyck $
2  * ===========================================================================
3  *
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Chris Lanczycki
27  *
28  * File Description:
29  *
30  * High-level algorithmic operations on one or more CCdCore objects.
31  * (methods that only traverse the cdd ASN.1 data structure are in
32  * placed in the CCdCore class itself)
33  *
34  * ===========================================================================
35  */
37 #ifndef CU_CD_HPP
38 #define CU_CD_HPP
40 #include <corelib/ncbistd.hpp>
46 BEGIN_SCOPE(cd_utils) // namespace ncbi::objects::
48 //class CCdCore;
49 //class CNcbi_mime_asn1;
57 bool Reorder(CCdCore* pCD, const vector<int> positions);
59 // Structure alignments in the features are required to be listed the order in
60 // which the PDB identifiers appear in the alignment.
61 // 'positions' contains the new order of all NON-MASTER rows, as in Reorder
63 bool ReorderStructureAlignments(CCdCore* pCD, const vector<int>& positions);
65 // Assumes cd has been remastered and the alignannot field still
66  // is indexed to the oldMasterRow's sequence. Does nothing if oldMasterRow = 0,
67  // or out of range, and returns false. Returns false if seqId of oldMasterRow
68  // does not match the seqId of every annotation in the alignannot, or if any
69  // mapping of from/to has an error.
71 bool remasterAlignannot(CCdCore& cd, unsigned int oldMasterRow = 1);
73 // Resets a variety of fields that need to be wiped out on remastering, or
74 // when removing a consensus.
76 void ResetFields(CCdCore* pCD);
78 // If ncbiMime contains a CD, return it. Otherwise return NULL.
82 // Remove consensus sequence from alignment and sequence list.
83 // If the master was a consensus sequence, remaster to the 2nd alignment row first.
84 //int PurgeConsensusSequences(CCdCore* pCD, bool resetFields = true);
86 // If copyNonASNMembers = true, all statistics, sequence strings and aligned residues
87 // data structures are copied. The CD's read status is *always* copied.
90 /* replaced CdAlignmentAdapter::CreateCD
91 CCdCore* CreateChildCD(const CCdCore* origCD, const vector<int> selectedRows, string newAccession, string shortName);
92 */
94 // Set creation date of CD w/ the current time. Removes existing creation date.
96 bool SetCreationDate(CCdCore* cd);
98 bool SetUpdateDate(CCdCore* cd);
99 // When only the first pointer is passed, checks for overlaps among that CD's rows.
100 // Otherwise, it reports on overlaps between two distinct CDs.
102 int NumberOfOverlappedRows(CCdCore* cd1, CCdCore* cd2 = NULL);
104 int GetOverlappedRows(CCdCore* cd1, CCdCore* cd2, vector<int>& rowsOfCD1, vector<int>& rowsOfCD2);
106 // For a specified row in cd1, find all potential matches in cd2.
107 // If cd1 == cd2, returns row1 plus any other matches (should be no such overlaps in a valid CD)
108 // If cd1AsChild == true, mapping assumes cd2 is parent of cd1.
109 // If cd1AsChild == false, mapping assumes cd1 is parent of cd2.
110 // In 'overlapMode', returns the row index of cd2 for *any* overlap with row1, not just
111 // those overlaps which obey the specified parent/child relationship between.
112 // Return number of rows found.
114 int GetMappedRowIds(CCdCore* cd1, int row1, CCdCore* cd2, vector<int>& rows2, bool cd1AsChild, bool overlapMode = false);
116 // return a vector containing (in order) the full sequence in NCBIeaa
117 // format for every Seq_entry in the sequence list
119 void SetConvertedSequencesForCD(CCdCore* cd, vector<string>& convertedSequences, bool forceRecompute = false);
121 // for each row, return a char* containing all residues aligned in the cd
123 void SetAlignedResiduesForCD(CCdCore* cd, char** & ppAlignedResidues, bool forceRecompute = false);
125 // Return strings containing the residues in each alignment column, in row order; pending rows are ignored.
126 // The index into the map is the zero-based position on the sequence corresponding to 'referenceRow'.
127 // If 'referenceRow' is not provided, or is out of range, the index will simply be the column number,
128 // starting from zero.
129 // Assumes that the CCdCore object has the same block model on each row. If that is not true,
130 // or other problems arise, 'columns' will be returned as an empty map.
132 void GetAlignmentColumnsForCD(CCdCore* cd, map<unsigned int, string>& columns, unsigned int referenceRow = kMax_UInt);
134 // Returns '<cd->GetAccession> (<cd->GetName>)' w/o the angle brackets;
135 // format used by the validator.
137 string GetVerboseNameStr(const CCdCore* cd);
139 // for getting the bioseq and seq-loc for rows of a CD.
145 CRef< CBioseq > GetBioseqWithFootprintForNthRow(CCdCore* cd, int N, string& errstr);
147 bool GetBioseqWithFootprintForNRows(CCdCore* cd, int N, vector< CRef< CBioseq > >& bioseqs, string& errstr);
149 // Sequences reporting no taxonomy info are ignored when finding the common tax node.
150 // Specify how to handle the case when *no* sequences in 'cd' have taxonomy info (e.g., all local sequences):
151 // useRootWhenNoTaxInfo = true means to return the root tax node
152 // useRootWhenNoTaxInfo = false means to return an empty CRef (i.e., report nothing)
154 CRef< COrg_ref > GetCommonTax(CCdCore* cd, bool useRootWhenNoTaxInfo = true);
159 // Remove consensus sequence from alignment and sequence list.
160 // If the master was a consensus sequence, remaster to the 2nd alignment row first.
162 int PurgeConsensusSequences(CCdCore* pCD, bool resetFields = true);
165 bool RemasterWithStructure(CCdCore* cd, string* msg = NULL);
168 bool ReMasterCdWithoutUnifiedBlocks(CCdCore* cd, int Row, bool resetFields = true);
170 // Return +ve (equal to # of block in IBM CD) if the block structure was modified successfully.
171 // Return 0 if no action taken.
172 // Return -ve if run IBM and it found no intersection or otherwise failed.
173 // 'rowFraction' specifies the minimum fraction of rows in the alignment that
174 // must have an aligned residue at a position for that position to be part of
175 // the intersected alignment. If 'rowFraction' <= 0 or > 1.0, rowFraction is
176 // reset to 1.0 (i.e., only columns with an aligned residue on all rows appear
177 // in the interested alignment).
178 // NOTE: Only modifying the alignment data; no other coordinate-dependent data in
179 // 'ccd' are altered due to modification of alignment blocks caused by IBM.
181 int IntersectByMaster(CCdCore* ccd, double rowFraction = 1.0);
183 END_SCOPE(cd_utils)
187 #endif // ALGCD_HPP
User-defined methods of the data storage class.
CRef –.
Definition: ncbiobj.hpp:618
Definition: map.hpp:338
Include a standard set of the NCBI C++ Toolkit most basic headers.
bool SetCreationDate(CCdCore *cd)
Definition: cuCD.cpp:207
bool remasterAlignannot(CCdCore &cd, unsigned int oldMasterRow=1)
Definition: cuCD.cpp:803
void ResetFields(CCdCore *pCD)
Definition: cuCD.cpp:89
bool ReMasterCdWithoutUnifiedBlocks(CCdCore *cd, int Row, bool resetFields=true)
Definition: cuCD.cpp:757
CCdCore * CopyCD(const CCdCore *cd)
Definition: cuCD.cpp:281
bool obeysParentTypeConstraints(const CCdCore *pCD)
Definition: cuCD.cpp:701
int GetOverlappedRows(CCdCore *cd1, CCdCore *cd2, vector< int > &rowsOfCD1, vector< int > &rowsOfCD2)
Definition: cuCD.cpp:352
string GetVerboseNameStr(const CCdCore *cd)
Definition: cuCD.cpp:551
bool Reorder(CCdCore *pCD, const vector< int > positions)
Definition: cuCD.cpp:112
void GetAlignmentColumnsForCD(CCdCore *cd, map< unsigned int, string > &columns, unsigned int referenceRow=kMax_UInt)
Definition: cuCD.cpp:458
bool RemasterWithStructure(CCdCore *cd, string *msg=NULL)
Definition: cuCD.cpp:728
CCdCore * ExtractCDFromMime(CNcbi_mime_asn1 *ncbiMime)
Definition: cuCD.cpp:262
int IntersectByMaster(CCdCore *ccd, double rowFraction=1.0)
Definition: cuCD.cpp:906
CRef< CBioseq > GetMasterBioseqWithFootprintOld(CCdCore *cd)
Definition: cuCD.cpp:558
bool ReorderStructureAlignments(CCdCore *pCD, const vector< int > &positions)
Definition: cuCD.cpp:148
int NumberOfOverlappedRows(CCdCore *cd1, CCdCore *cd2=NULL)
Definition: cuCD.cpp:403
bool SetUpdateDate(CCdCore *cd)
Definition: cuCD.cpp:234
void SetAlignedResiduesForCD(CCdCore *cd, char **&ppAlignedResidues, bool forceRecompute=false)
Definition: cuCD.cpp:422
bool GetBioseqWithFootprintForNRows(CCdCore *cd, int N, vector< CRef< CBioseq > > &bioseqs, string &errstr)
Definition: cuCD.cpp:624
void SetConvertedSequencesForCD(CCdCore *cd, vector< string > &convertedSequences, bool forceRecompute=false)
Definition: cuCD.cpp:408
int PurgeConsensusSequences(CCdCore *pCD, bool resetFields=true)
Definition: cuCD.cpp:878
int GetMappedRowIds(CCdCore *cd1, int row1, CCdCore *cd2, vector< int > &rows2, bool cd1AsChild, bool overlapMode=false)
Definition: cuCD.cpp:301
Definition: cuCD.hpp:51
CRef< COrg_ref > GetCommonTax(CCdCore *cd, bool useRootWhenNoTaxInfo=true)
Definition: cuCD.cpp:639
CRef< CBioseq > GetBioseqWithFootprintForNthRow(CCdCore *cd, int N, string &errstr)
Definition: cuCD.cpp:590
CRef< CBioseq > GetMasterBioseqWithFootprint(CCdCore *cd)
Definition: cuCD.cpp:580
int GetReMasterFailureCode(const CCdCore *cd)
Definition: cuCD.cpp:73
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
static const column_t columns[]
Definition: utf8_2.c:22
#define NULL
Definition: ncbistd.hpp:225
#define kMax_UInt
Definition: ncbi_limits.h:185
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
Definition: ncbi_export.h:376
#define const
Definition: zconf.h:232
#define N
Definition: crc32.c:57
Modified on Sun Apr 21 03:43:27 2024 by rev. 669887