NCBI C++ ToolKit
Functions | Variables
biosample_util.cpp File Reference
#include <ncbi_pch.hpp>
#include <corelib/ncbistd.hpp>
#include <corelib/ncbistr.hpp>
#include <serial/enumvalues.hpp>
#include <serial/serialimpl.hpp>
#include <objects/seqloc/Seq_id.hpp>
#include <objects/seqfeat/SeqFeatData.hpp>
#include <objects/seqfeat/Gb_qual.hpp>
#include <objects/seqset/Seq_entry.hpp>
#include <objects/seqset/Bioseq_set.hpp>
#include <objects/seq/Bioseq.hpp>
#include <objects/misc/sequence_macros.hpp>
#include <objects/seqtable/SeqTable_multi_data.hpp>
#include <objects/seqtable/SeqTable_column_info.hpp>
#include <objects/valid/Comment_rule.hpp>
#include <objmgr/scope.hpp>
#include <objmgr/seq_vector.hpp>
#include <objmgr/util/sequence.hpp>
#include <objmgr/bioseq_ci.hpp>
#include <objmgr/seqdesc_ci.hpp>
#include <objmgr/object_manager.hpp>
#include <objtools/edit/dblink_field.hpp>
#include <algorithm>
#include <vector>
#include <list>
#include <map>
#include <objects/seq/Seq_descr.hpp>
#include <connect/ncbi_conn_stream.hpp>
#include <serial/objistrasn.hpp>
#include <misc/xmlwrapp/xmlwrapp.hpp>
#include <misc/biosample_util/biosample_util.hpp>
#include <misc/biosample_util/struc_table_column.hpp>
#include <objects/general/User_object.hpp>
#include <objects/general/User_field.hpp>
#include <objects/general/Object_id.hpp>
#include <serial/objistr.hpp>
+ Include dependency graph for biosample_util.cpp:

Go to the source code of this file.

Go to the SVN repository for this file.

Functions

string PrepareUrl (bool use_dev_server, const string &args)
 
CRef< CSeq_descrGetBiosampleData (const string &accession, bool use_dev_server, TBioSamples *cache)
 
EStatus GetBioSampleStatusFromNode (const node &item)
 
TStatus ProcessBiosampleStatusNode (node &item)
 
EStatus GetBiosampleStatus (const string &accession, bool use_dev_server, TStatuses *cache)
 
void ProcessBulkBioSample (TStatuses &status, string list, bool use_dev_server)
 
void GetBiosampleStatus (TStatuses &status, bool use_dev_server)
 
string GetBiosampleStatusName (EStatus status)
 
vector< stringGetDBLinkIDs (const CUser_object &user, const string &field_name)
 
vector< stringGetDBLinkIDs (const CSeqdesc &seqdesc, const string &field)
 
vector< stringGetBiosampleIDs (CBioseq_Handle bh)
 
vector< stringGetBioProjectIDs (CBioseq_Handle bh)
 
void sPrintField (const string &value, size_t width, CNcbiOstream &ostr)
 
bool s_CompareStructuredCommentFields (CRef< CStructuredCommentTableColumnBase > f1, CRef< CStructuredCommentTableColumnBase > f2)
 
TStructuredCommentTableColumnList GetAvailableFields (vector< CConstRef< CUser_object > > src)
 
TBiosampleFieldDiffList GetFieldDiffs (const string &sequence_id, const string &biosample_id, const CBioSource &src, const CBioSource &sample)
 
bool s_ShouldIgnoreStructuredCommentFieldDiff (const string &label, const string &src_val, const string &sample_val)
 
TBiosampleFieldDiffList GetFieldDiffs (const string &sequence_id, const string &biosample_id, const CUser_object &src, const CUser_object &sample)
 
TBiosampleFieldDiffList GetFieldDiffs (const string &sequence_id, const string &biosample_id, CConstRef< CUser_object > src, CConstRef< CUser_object > sample)
 
bool DoDiffsContainConflicts (const TBiosampleFieldDiffList &diffs, CNcbiOstream *log)
 
bool s_IsReportableStructuredComment (const CSeqdesc &desc, const string &expected_prefix)
 
CRef< CSeqTable_columnFindSeqTableColumnByName (CRef< CSeq_table > values_table, string column_name)
 
void AddValueToColumn (CRef< CSeqTable_column > column, string value, size_t row)
 
void AddValueToTable (CSeq_table &table, string column_name, string value, size_t row)
 
string GetValueFromColumn (const CSeqTable_column &column, size_t row)
 
string GetValueFromTable (const CSeq_table &table, string column_name, size_t row)
 
static bool s_IsCitSub (const CSeqdesc &desc)
 
void AddBioseqToTable (CBioseq_Handle bh, CSeq_table &table, bool with_id, bool include_comments, const string &expected_prefix)
 
void HarmonizeAttributeName (string &attribute_name)
 
bool AttributeNamesAreEquivalent (string name1, string name2)
 
bool ResolveSuppliedBioSampleAccession (const string &biosample_accession, vector< string > &biosample_ids)
 
string GetBestBioseqLabel (CBioseq_Handle bsh)
 
TBiosampleFieldDiffList GetBioseqDiffs (CBioseq_Handle bh, const string &biosample_accession, size_t &num_processed, vector< string > &unprocessed_ids, bool use_dev_server, bool compare_structured_comments, const string &expected_prefix, TBioSamples *cache)
 
void AddContact (node::iterator &organization, CConstRef< CAuth_list > auth_list)
 
void s_AddSamplePair (node &sample_attrs, string attribute_name, string val)
 
void AddBioSourceToAttributes (node &organism, node &sample_attrs, const CBioSource &src)
 
void AddStructuredCommentToAttributes (node &sample_attrs, const CUser_object &usr)
 
string OwnerFromAffil (const CAffil &affil)
 
void PrintBioseqXML (CBioseq_Handle bh, const string &id_prefix, CNcbiOstream *report_stream, const string &bioproject_accession, const string &default_owner, const string &hup_date, const string &comment, bool first_seq_only, bool report_structured_comments, const string &expected_prefix)
 
void GenerateDiffListFromBioSource (const CSeq_descr &bioSample, const CBioSource &bioSource, TBiosampleFieldDiffList &diffs)
 
void SaveSerialObject (const string &filename, const CSerialObject &object)
 
bool GenerateDiffListFromBioSource (const string &existingBiosampleAcc, const CBioSource &newBioSource, CBioSource &proposedNewBiosource, TBiosampleFieldDiffList &diffs)
 
bool UpdateBiosourceFromBiosample (const CBioSource &existingBiosource, CBioSource &newBiosource)
 
CConstRef< CBiosampleFieldDiffsGetDiffByFieldName (const TBiosampleFieldDiffList &diffs, const string &fieldName)
 
bool UpdateBiosourceFromBiosample (const TBiosampleFieldDiffList &diffs, const CBioSource &existingBiosource, CBioSource &newBiosource)
 
void PrettyPrint (const TBiosampleFieldDiffList &diffList, CNcbiOstream &ostr, size_t keyWidth, size_t valueWidth)
 

Variables

static const char * kSequenceID = "Sequence ID"
 
static const char * kAffilInst = "Institution"
 
static const char * kAffilDept = "Department"
 
static const char * kBioProject = "BioProject"
 
static const char * kStructuredCommentPrefix = "StructuredCommentPrefix"
 
static const char * kStructuredCommentSuffix = "StructuredCommentSuffix"
 

Function Documentation

◆ AddBioseqToTable()

void AddBioseqToTable ( CBioseq_Handle  bh,
CSeq_table table,
bool  with_id,
bool  include_comments,
const string expected_prefix 
)

◆ AddBioSourceToAttributes()

void AddBioSourceToAttributes ( node organism,
node sample_attrs,
const CBioSource src 
)

◆ AddContact()

void AddContact ( node::iterator organization,
CConstRef< CAuth_list auth_list 
)

◆ AddStructuredCommentToAttributes()

void AddStructuredCommentToAttributes ( node sample_attrs,
const CUser_object usr 
)

◆ AddValueToColumn()

void AddValueToColumn ( CRef< CSeqTable_column column,
string  value,
size_t  row 
)

Definition at line 674 of file biosample_util.cpp.

References column, row, and rapidjson::value.

Referenced by AddValueToTable().

◆ AddValueToTable()

void AddValueToTable ( CSeq_table table,
string  column_name,
string  value,
size_t  row 
)

◆ AttributeNamesAreEquivalent()

bool AttributeNamesAreEquivalent ( string  name1,
string  name2 
)

Definition at line 847 of file biosample_util.cpp.

References NStr::EqualNocase(), and HarmonizeAttributeName().

Referenced by AddStructuredCommentToAttributes().

◆ DoDiffsContainConflicts()

bool DoDiffsContainConflicts ( const TBiosampleFieldDiffList diffs,
CNcbiOstream log 
)

Definition at line 606 of file biosample_util.cpp.

References NStr::IsBlank(), ITERATE, and log.

Referenced by CBiosampleChkApp::ProcessBioseqHandle().

◆ FindSeqTableColumnByName()

CRef<CSeqTable_column> FindSeqTableColumnByName ( CRef< CSeq_table values_table,
string  column_name 
)

◆ GenerateDiffListFromBioSource() [1/2]

void GenerateDiffListFromBioSource ( const CSeq_descr bioSample,
const CBioSource bioSource,
TBiosampleFieldDiffList diffs 
)

◆ GenerateDiffListFromBioSource() [2/2]

bool GenerateDiffListFromBioSource ( const string existingBiosampleAcc,
const CBioSource newBioSource,
CBioSource proposedNewBiosource,
TBiosampleFieldDiffList diffs 
)

◆ GetAvailableFields()

TStructuredCommentTableColumnList GetAvailableFields ( vector< CConstRef< CUser_object > >  src)

◆ GetBestBioseqLabel()

string GetBestBioseqLabel ( CBioseq_Handle  bsh)

◆ GetBioProjectIDs()

vector<string> GetBioProjectIDs ( CBioseq_Handle  bh)

Definition at line 343 of file biosample_util.cpp.

References CSeqdesc_Base::e_User, GetDBLinkIDs(), and ITERATE.

Referenced by AddBioseqToTable(), and PrintBioseqXML().

◆ GetBiosampleData()

CRef< CSeq_descr > GetBiosampleData ( const string accession,
bool  use_dev_server,
TBioSamples cache 
)

◆ GetBiosampleIDs()

vector<string> GetBiosampleIDs ( CBioseq_Handle  bh)

◆ GetBiosampleStatus() [1/2]

EStatus GetBiosampleStatus ( const string accession,
bool  use_dev_server,
TStatuses cache 
)

◆ GetBiosampleStatus() [2/2]

void GetBiosampleStatus ( TStatuses status,
bool  use_dev_server 
)

◆ GetBioSampleStatusFromNode()

EStatus GetBioSampleStatusFromNode ( const node item)

◆ GetBiosampleStatusName()

string GetBiosampleStatusName ( EStatus  status)

◆ GetBioseqDiffs()

TBiosampleFieldDiffList GetBioseqDiffs ( CBioseq_Handle  bh,
const string biosample_accession,
size_t &  num_processed,
vector< string > &  unprocessed_ids,
bool  use_dev_server,
bool  compare_structured_comments,
const string expected_prefix,
TBioSamples cache 
)

◆ GetDBLinkIDs() [1/2]

vector<string> GetDBLinkIDs ( const CSeqdesc seqdesc,
const string field 
)

◆ GetDBLinkIDs() [2/2]

vector<string> GetDBLinkIDs ( const CUser_object user,
const string field_name 
)

◆ GetFieldDiffs() [1/3]

TBiosampleFieldDiffList GetFieldDiffs ( const string sequence_id,
const string biosample_id,
CConstRef< CUser_object src,
CConstRef< CUser_object sample 
)

◆ GetFieldDiffs() [2/3]

TBiosampleFieldDiffList GetFieldDiffs ( const string sequence_id,
const string biosample_id,
const CBioSource src,
const CBioSource sample 
)

Definition at line 515 of file biosample_util.cpp.

References CBioSource::GetBiosampleDiffs(), and ITERATE.

Referenced by GenerateDiffListFromBioSource(), and GetBioseqDiffs().

◆ GetFieldDiffs() [3/3]

TBiosampleFieldDiffList GetFieldDiffs ( const string sequence_id,
const string biosample_id,
const CUser_object src,
const CUser_object sample 
)

◆ GetValueFromColumn()

string GetValueFromColumn ( const CSeqTable_column column,
size_t  row 
)

Definition at line 704 of file biosample_util.cpp.

References column, row, and val.

Referenced by GetValueFromTable().

◆ GetValueFromTable()

string GetValueFromTable ( const CSeq_table table,
string  column_name,
size_t  row 
)

Definition at line 715 of file biosample_util.cpp.

References NStr::EqualNocase(), GetValueFromColumn(), ITERATE, row, table, and val.

◆ HarmonizeAttributeName()

void HarmonizeAttributeName ( string attribute_name)

Definition at line 839 of file biosample_util.cpp.

References NStr::ReplaceInPlace().

Referenced by AttributeNamesAreEquivalent().

◆ OwnerFromAffil()

string OwnerFromAffil ( const CAffil affil)

◆ PrepareUrl()

string PrepareUrl ( bool  use_dev_server,
const string args 
)

Definition at line 84 of file biosample_util.cpp.

Referenced by GetBiosampleData(), GetBiosampleStatus(), and ProcessBulkBioSample().

◆ PrettyPrint()

void PrettyPrint ( const TBiosampleFieldDiffList diffList,
CNcbiOstream ostr,
size_t  keyWidth,
size_t  valueWidth 
)

Definition at line 1604 of file biosample_util.cpp.

References sPrintField(), and string.

Referenced by PrintDiffList().

◆ PrintBioseqXML()

void PrintBioseqXML ( CBioseq_Handle  bh,
const string id_prefix,
CNcbiOstream report_stream,
const string bioproject_accession,
const string default_owner,
const string hup_date,
const string comment,
bool  first_seq_only,
bool  report_structured_comments,
const string expected_prefix 
)

◆ ProcessBiosampleStatusNode()

TStatus ProcessBiosampleStatusNode ( node item)

◆ ProcessBulkBioSample()

void ProcessBulkBioSample ( TStatuses status,
string  list,
bool  use_dev_server 
)

◆ ResolveSuppliedBioSampleAccession()

bool ResolveSuppliedBioSampleAccession ( const string biosample_accession,
vector< string > &  biosample_ids 
)

◆ s_AddSamplePair()

void s_AddSamplePair ( node sample_attrs,
string  attribute_name,
string  val 
)

◆ s_CompareStructuredCommentFields()

bool s_CompareStructuredCommentFields ( CRef< CStructuredCommentTableColumnBase f1,
CRef< CStructuredCommentTableColumnBase f2 
)

Definition at line 466 of file biosample_util.cpp.

References NStr::Compare().

Referenced by GetAvailableFields().

◆ s_IsCitSub()

static bool s_IsCitSub ( const CSeqdesc desc)
static

◆ s_IsReportableStructuredComment()

bool s_IsReportableStructuredComment ( const CSeqdesc desc,
const string expected_prefix 
)

◆ s_ShouldIgnoreStructuredCommentFieldDiff()

bool s_ShouldIgnoreStructuredCommentFieldDiff ( const string label,
const string src_val,
const string sample_val 
)

Definition at line 530 of file biosample_util.cpp.

References NStr::Equal(), NStr::EqualNocase(), and label.

Referenced by GetFieldDiffs().

◆ SaveSerialObject()

void SaveSerialObject ( const string filename,
const CSerialObject object 
)

Definition at line 1416 of file biosample_util.cpp.

Referenced by GenerateDiffListFromBioSource().

◆ sGetDiffByFieldName()

CConstRef<CBiosampleFieldDiff> sGetDiffByFieldName ( const TBiosampleFieldDiffList diffs,
const string fieldName 
)

Definition at line 1531 of file biosample_util.cpp.

Referenced by UpdateBiosourceFromBiosample().

◆ sPrintField()

void sPrintField ( const string value,
size_t  width,
CNcbiOstream ostr 
)

Definition at line 391 of file biosample_util.cpp.

References string, and rapidjson::value.

Referenced by PrettyPrint(), and CBiosampleFieldDiff::PrettyPrint().

◆ UpdateBiosourceFromBiosample() [1/2]

bool UpdateBiosourceFromBiosample ( const CBioSource existingBiosource,
CBioSource newBiosource 
)

◆ UpdateBiosourceFromBiosample() [2/2]

bool UpdateBiosourceFromBiosample ( const TBiosampleFieldDiffList diffs,
const CBioSource existingBiosource,
CBioSource newBiosource 
)

Variable Documentation

◆ kAffilDept

const char* kAffilDept = "Department"
static

Definition at line 746 of file biosample_util.cpp.

Referenced by AddBioseqToTable().

◆ kAffilInst

const char* kAffilInst = "Institution"
static

Definition at line 745 of file biosample_util.cpp.

Referenced by AddBioseqToTable().

◆ kBioProject

const char* kBioProject = "BioProject"
static

Definition at line 747 of file biosample_util.cpp.

Referenced by AddBioseqToTable().

◆ kSequenceID

const char* kSequenceID = "Sequence ID"
static

Definition at line 744 of file biosample_util.cpp.

Referenced by AddBioseqToTable().

◆ kStructuredCommentPrefix

const char* kStructuredCommentPrefix = "StructuredCommentPrefix"
static

Definition at line 1125 of file biosample_util.cpp.

Referenced by AddStructuredCommentToAttributes().

◆ kStructuredCommentSuffix

const char* kStructuredCommentSuffix = "StructuredCommentSuffix"
static

Definition at line 1126 of file biosample_util.cpp.

Referenced by AddStructuredCommentToAttributes().

Modified on Mon Jun 24 05:22:40 2024 by modify_doxy.py rev. 669887