NCBI C++ ToolKit
biosample_util.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: biosample_util.hpp 89243 2020-03-11 15:14:32Z ludwigf $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Colleen Bollin
27  *
28  * File Description:
29  * check biosource and structured comment descriptors against biosample database
30  *
31  */
32 
33 #ifndef BIOSAMPLE_CHK__UTIL__HPP
34 #define BIOSAMPLE_CHK__UTIL__HPP
35 
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbistr.hpp>
38 
40 #include <objects/seq/Seqdesc.hpp>
44 
45 #include <objmgr/bioseq_handle.hpp>
46 
47 
50 BEGIN_SCOPE(biosample_util)
51 
52 typedef map< string, CRef< CSeq_descr > > TBioSamples;
53 typedef map< string, CRef< CSeq_descr > >::iterator TBioSamplesIterator;
54 
55 CRef< CSeq_descr > GetBiosampleData(const string& accession, bool use_dev_server = false, TBioSamples *cache = NULL);
56 
57 enum EStatus {
65 };
66 
69 typedef pair<string, biosample_util::EStatus> TStatus;
70 EStatus GetBiosampleStatus(const string& accession, bool use_dev_server = false, TStatuses *cache = NULL);
71 void GetBiosampleStatus(TStatuses& status, bool use_dev_server = false);
72 string GetBiosampleStatusName(EStatus status);
73 
74 
75 vector<string> GetBiosampleIDs(CBioseq_Handle bh);
76 vector<string> GetBioProjectIDs(CBioseq_Handle bh);
77 
78 
80 {
81 public:
83  CBiosampleFieldDiff(const string& sequence_id, const string& biosample_id, const string& field_name, const string& src_val, const string& sample_val) :
84  m_SequenceID(sequence_id), m_BiosampleID(biosample_id), m_FieldName(field_name), m_SrcVal(src_val), m_SampleVal(sample_val)
85  {};
86  CBiosampleFieldDiff(const string& sequence_id, const string& biosample_id, const CFieldDiff& diff) :
87  m_SequenceID(sequence_id), m_BiosampleID(biosample_id),
88  m_FieldName(diff.GetFieldName()),
89  m_SrcVal(diff.GetSrcVal()),
91  {};
92 
94 
95  static void PrintHeader(ncbi::CNcbiOstream & stream, bool show_seq_id = true);
96  void Print(ncbi::CNcbiOstream & stream, bool show_seq_id = true) const;
97  void Print(ncbi::CNcbiOstream & stream, const CBiosampleFieldDiff& prev);
98  void PrettyPrint(ncbi::CNcbiOstream & stream, size_t keyWidth=20, size_t valueWidth=40) const;
99  const string& GetSequenceId() const { return m_SequenceID; };
100  void SetSequenceId(const string& id) { m_SequenceID = id; };
101  const string& GetFieldName() const { return m_FieldName; };
102  string GetSrcVal() const { return CBioSource::IsStopWord(m_SrcVal) ? string("") : m_SrcVal; };
103  string GetSampleVal() const { return CBioSource::IsStopWord(m_SampleVal) ? string("") : m_SampleVal; };
104  string GetPureSrcVal() const { return m_SrcVal; };
105  string GetPureSampleVal() const { return m_SampleVal; };
106  const string& GetBioSample() const { return m_BiosampleID; };
107 
109  int Compare(const CBiosampleFieldDiff& other);
110 
111 private:
112  string m_SequenceID;
114  string m_FieldName;
115  string m_SrcVal;
116  string m_SampleVal;
117 };
118 
119 typedef vector< CRef<CBiosampleFieldDiff> > TBiosampleFieldDiffList;
120 
123  const string& biosample_accession,
124  size_t& num_processed,
125  vector<string>& unprocessed_ids,
126  bool use_dev_server = false,
127  bool compare_structured_comments = false,
128  const string& expected_prefix = "",
129  TBioSamples *cache = NULL);
130 
131 TBiosampleFieldDiffList GetFieldDiffs(const string& sequence_id, const string& biosample_id, const CBioSource& src, const CBioSource& sample);
132 TBiosampleFieldDiffList GetFieldDiffs(const string& sequence_id, const string& biosample_id, CConstRef<CUser_object> src, CConstRef<CUser_object> sample);
133 
134 bool ResolveSuppliedBioSampleAccession(const string& biosample_accession, vector<string>& biosample_ids);
135 
137 
138 // This function is for generating a table of biosample values for a bioseq
139 // // that does not currently have a biosample ID
140 void AddBioseqToTable(CBioseq_Handle bh, CSeq_table& table, bool with_id,
141  bool include_comments = false, const string& expected_prefix = "");
142 
143 
145 bool AttributeNamesAreEquivalent(string name1, string name2);
146 
148  const string& id_prefix,
149  CNcbiOstream* report_stream,
150  const string& bioproject_accession,
151  const string& default_owner,
152  const string& hup_date,
153  const string& comment,
154  bool first_seq_only,
155  bool report_structured_comments,
156  const string& expected_prefix);
157 
158 string OwnerFromAffil(const CAffil& affil);
159 
160 // rw-905 >>
161 void
163  const CSeq_descr& bioSample, // as retrieved from /biosample/fetch
164  const CBioSource& bioSource, // as plugged from a bioseq or seq-entry
165  TBiosampleFieldDiffList& diffs); // where to put list of "relevent" differences
166 
167 bool
169  const string& bioSampleAcc, // as retrieved from /biosample/fetch
170  const CBioSource& bioSource, // as plugged from a bioseq or seq-entry
171  CBioSource& bioSampleSource, // assigned from biosample, if there are relevant diffs
172  TBiosampleFieldDiffList& diffs); // where to put list of "relevent" diffs
173 // << rw-905
174 
175 bool
177  const CBioSource& existingBioSource,
178  CBioSource& newBioource);
179 
180 bool
182  const TBiosampleFieldDiffList& diffs,
183  const CBioSource& existingBioSource,
184  CBioSource& newBioource);
185 
186 void PrettyPrint(
187  const TBiosampleFieldDiffList& diffList,
188  CNcbiOstream& ostr,
189  size_t keyWidth = 20,
190  size_t valueWidth = 40);
191 
192 END_SCOPE(biosample_util)
195 
196 #endif //BIOSAMPLE_CHK__UTIL__HPP
User-defined methods of the data storage class.
User-defined methods of the data storage class.
string OwnerFromAffil(const CAffil &affil)
vector< CRef< CBiosampleFieldDiff > > TBiosampleFieldDiffList
bool ResolveSuppliedBioSampleAccession(const string &biosample_accession, vector< string > &biosample_ids)
vector< string > GetBioProjectIDs(CBioseq_Handle bh)
map< string, CRef< CSeq_descr > >::iterator TBioSamplesIterator
bool DoDiffsContainConflicts(const TBiosampleFieldDiffList &diffs, CNcbiOstream *log)
string GetBestBioseqLabel(CBioseq_Handle bsh)
EStatus GetBiosampleStatus(const string &accession, bool use_dev_server=false, TStatuses *cache=NULL)
bool UpdateBiosourceFromBiosample(const CBioSource &existingBioSource, CBioSource &newBioource)
EStatus
@ eStatus_ToBeCurated
@ eStatus_Hup
@ eStatus_Replaced
@ eStatus_Suppressed
@ eStatus_Live
@ eStatus_Unknown
@ eStatus_Withdrawn
map< string, EStatus > TStatuses
void PrintBioseqXML(CBioseq_Handle bh, const string &id_prefix, CNcbiOstream *report_stream, const string &bioproject_accession, const string &default_owner, const string &hup_date, const string &comment, bool first_seq_only, bool report_structured_comments, const string &expected_prefix)
CRef< CSeq_descr > GetBiosampleData(const string &accession, bool use_dev_server=false, TBioSamples *cache=NULL)
string GetBiosampleStatusName(EStatus status)
TBiosampleFieldDiffList GetBioseqDiffs(CBioseq_Handle bh, const string &biosample_accession, size_t &num_processed, vector< string > &unprocessed_ids, bool use_dev_server=false, bool compare_structured_comments=false, const string &expected_prefix="", TBioSamples *cache=NULL)
void GenerateDiffListFromBioSource(const CSeq_descr &bioSample, const CBioSource &bioSource, TBiosampleFieldDiffList &diffs)
bool AttributeNamesAreEquivalent(string name1, string name2)
pair< string, biosample_util::EStatus > TStatus
vector< string > GetBiosampleIDs(CBioseq_Handle bh)
map< string, EStatus >::iterator TStatusesIterator
void AddBioseqToTable(CBioseq_Handle bh, CSeq_table &table, bool with_id, bool include_comments=false, const string &expected_prefix="")
void PrettyPrint(const TBiosampleFieldDiffList &diffList, CNcbiOstream &ostr, size_t keyWidth=20, size_t valueWidth=40)
TBiosampleFieldDiffList GetFieldDiffs(const string &sequence_id, const string &biosample_id, const CBioSource &src, const CBioSource &sample)
#define false
Definition: bool.h:36
@Affil.hpp User-defined methods of the data storage class.
Definition: Affil.hpp:56
static bool IsStopWord(const string &value)
Definition: BioSource.cpp:1382
string GetSampleVal() const
CBiosampleFieldDiff(const string &sequence_id, const string &biosample_id, const CFieldDiff &diff)
int Compare(const CBiosampleFieldDiff &other)
static void PrintHeader(ncbi::CNcbiOstream &stream, bool show_seq_id=true)
void SetSequenceId(const string &id)
void PrettyPrint(ncbi::CNcbiOstream &stream, size_t keyWidth=20, size_t valueWidth=40) const
void Print(ncbi::CNcbiOstream &stream, bool show_seq_id=true) const
string GetPureSrcVal() const
const string & GetBioSample() const
string GetPureSampleVal() const
const string & GetSequenceId() const
string GetSrcVal() const
CBiosampleFieldDiff(const string &sequence_id, const string &biosample_id, const string &field_name, const string &src_val, const string &sample_val)
int CompareAllButSequenceID(const CBiosampleFieldDiff &other)
const string & GetFieldName() const
CBioseq_Handle –.
CObject –.
Definition: ncbiobj.hpp:180
CRef –.
Definition: ncbiobj.hpp:618
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
container_type::iterator iterator
Definition: map.hpp:54
Definition: map.hpp:338
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
#define const
Definition: zconf.h:230
Modified on Thu Dec 07 10:10:09 2023 by modify_doxy.py rev. 669887