NCBI C++ ToolKit
biosample_util.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: biosample_util.cpp 101944 2024-03-06 20:49:56Z kans $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Colleen Bollin
27  *
28  * File Description:
29  * Implementation of utility classes and functions for biosample_chk.
30  *
31  */
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 #include <corelib/ncbistr.hpp>
35 
36 #include <serial/enumvalues.hpp>
37 #include <serial/serialimpl.hpp>
38 
44 #include <objects/seq/Bioseq.hpp>
49 #include <objmgr/scope.hpp>
50 #include <objmgr/seq_vector.hpp>
51 #include <objmgr/util/sequence.hpp>
52 #include <objmgr/bioseq_ci.hpp>
53 #include <objmgr/seqdesc_ci.hpp>
56 
57 #include <algorithm>
58 #include <vector>
59 #include <list>
60 #include <map>
61 
62 // for biosample fetching
65 #include <serial/objistrasn.hpp>
66 
70 
75 #include <serial/objistrasn.hpp>
76 #include <serial/objistr.hpp>
77 
80 BEGIN_SCOPE(biosample_util)
81 
82 using namespace xml;
83 
84 string PrepareUrl(bool use_dev_server, const string &args)
85 {
86  string host = use_dev_server ? "dev-api-int.ncbi.nlm.nih.gov" : "api-int.ncbi.nlm.nih.gov";
87  string path = "/biosample/fetch/";
88  string url = "https://" + host + path + "?" + args;
89  return url;
90 }
91 
93 GetBiosampleData(const string& accession, bool use_dev_server, TBioSamples *cache)
94 {
95  if (cache)
96  {
97  TBioSamplesIterator it = cache->find(accession);
98  if (it != cache->end())
99  {
100  return it->second;
101  }
102  }
103 
104  string args = "accession=" + accession + "&format=asn1raw";
105  CConn_HttpStream http_stream(PrepareUrl(use_dev_server, args));
106  unique_ptr<CObjectIStream> in_stream;
107  in_stream.reset(new CObjectIStreamAsn(http_stream));
108 
109  CRef< CSeq_descr > response(new CSeq_descr());
110  try {
111  *in_stream >> *response;
112  } catch (...) {
113  response.Reset(NULL);
114  }
115  if (cache)
116  {
117  (*cache)[accession] = response;
118  }
119 
120 #if 0
121  // show biosample server response for debugging
122  static unsigned int suffix(1);
123  if (response) {
124  string filename = "biosource.asn1." + NStr::IntToString(suffix++);
125  ofstream ostr(filename);
126  ostr << MSerial_Format_AsnText() << response;
127  ostr.close();
128  }
129 #endif
130 
131  return response;
132 }
133 
135 {
137  while (at != item.get_attributes().end()) {
138  if (NStr::Equal(at->get_name(), "status")) {
139  string val = at->get_value();
140  if (NStr::EqualNocase(val, "live")) {
141  return eStatus_Live;
142  } else if (NStr::EqualNocase(val, "hup")) {
143  return eStatus_Hup;
144  } else if (NStr::EqualNocase(val, "withdrawn")) {
145  return eStatus_Withdrawn;
146  } else if (NStr::EqualNocase(val, "suppressed")) {
147  return eStatus_Suppressed;
148  } else if (NStr::EqualNocase(val, "to_be_curated")) {
149  return eStatus_ToBeCurated;
150  } else if (NStr::EqualNocase(val, "replaced")) {
151  return eStatus_Replaced;
152  } else {
153  return eStatus_Unknown;
154  }
155  break;
156  }
157  ++at;
158  }
159  return eStatus_Unknown;
160 }
161 
162 
164 {
165  TStatus response;
167  while (at1 != item.get_attributes().end() && NStr::IsBlank(response.first)) {
168  if (NStr::Equal(at1->get_name(), "accession")) {
169  response.first = at1->get_value();
170  }
171  ++at1;
172  }
173  node::iterator it = item.begin();
174  while (it != item.end()) {
175  if (NStr::Equal(it->get_name(), "Status")) {
176  response.second = GetBioSampleStatusFromNode(*it);
177  break;
178  }
179  ++it;
180  }
181  return response;
182 }
183 
184 EStatus GetBiosampleStatus(const string& accession, bool use_dev_server, TStatuses *cache)
185 {
186  if (cache)
187  {
188  TStatusesIterator it = cache->find(accession);
189  if (it != cache->end())
190  {
191  return it->second;
192  }
193  }
194 
195  EStatus status = eStatus_Unknown;
196  string args = "accession=" + accession;
197  CConn_HttpStream http_stream(PrepareUrl(use_dev_server, args));
198  xml::error_messages errors;
199  document response(http_stream, &errors);
200 
201 #if 0
202  TStatus status = ProcessBiosampleStatusNode(response.get_root_node());
203  return status.second;
204 #else
205  // get status from XML response
206  node & root = response.get_root_node();
207  node::iterator it = root.begin();
208  while (it != root.end())
209  {
210  if (NStr::Equal(it->get_name(), "Status"))
211  {
212  status = GetBioSampleStatusFromNode(*it);
213  break;
214  }
215  ++it;
216  }
217 #endif
218  if (cache)
219  {
220  (*cache)[accession] = status;
221  }
222  return status;
223 }
224 
225 void ProcessBulkBioSample(TStatuses& status, string list, bool use_dev_server)
226 {
227  string args = "id=" + list + "&bulk=true";
228  CConn_HttpStream http_stream(PrepareUrl(use_dev_server, args));
229  xml::error_messages errors;
230  document response(http_stream, &errors);
231 
232  // get status from XML response
233  node & root = response.get_root_node();
234  node::iterator it = root.begin();
235  while (it != root.end())
236  {
237  if (NStr::EqualNocase(it->get_name(), "BioSample")) {
238  TStatus response = ProcessBiosampleStatusNode(*it);
239  status[response.first] = response.second;
240  }
241  ++it;
242  }
243 }
244 
245 void GetBiosampleStatus(TStatuses& status, bool use_dev_server)
246 {
247  size_t count = 0;
248  string list = "";
249  for (TStatuses::iterator it = status.begin(); it != status.end(); ++it) {
250  list += "," + it->first;
251  count++;
252  if (count == 900) {
253  ProcessBulkBioSample(status, list.substr(1), use_dev_server);
254  list = "";
255  count = 0;
256  }
257  }
258  if (!NStr::IsBlank(list)) {
259  ProcessBulkBioSample(status, list.substr(1), use_dev_server);
260  }
261 }
262 
263 
265 {
266  switch (status) {
267  case eStatus_Unknown:
268  return "Unknown";
269  break;
270  case eStatus_Live:
271  return "Live";
272  break;
273  case eStatus_Hup:
274  return "HUP";
275  break;
276  case eStatus_Withdrawn:
277  return "Withdrawn";
278  break;
279  case eStatus_Suppressed:
280  return "Suppressed";
281  break;
282  case eStatus_ToBeCurated:
283  return "ToBeCurated";
284  break;
285  case eStatus_Replaced:
286  return "Replaced";
287  break;
288  }
289  return kEmptyStr;
290 }
291 
292 
293 vector<string> GetDBLinkIDs(const CUser_object& user, const string& field_name)
294 {
295  vector<string> ids;
296 
297  if (!user.IsSetType() || !user.GetType().IsStr() || !NStr::EqualNocase(user.GetType().GetStr(), "DBLink")) {
298  // Not DBLink object
299  return ids;
300  }
301  try {
302  const CUser_field& field = user.GetField(field_name);
303  if (field.IsSetData() && field.GetData().IsStrs()) {
305  ids.push_back(*it);
306  }
307  }
308  } catch (...) {
309  // no biosample ID field
310  }
311 
312  return ids;
313 }
314 
315 
316 vector<string> GetDBLinkIDs(const CSeqdesc& seqdesc, const string& field)
317 {
318  vector<string> ids;
319 
320  if (seqdesc.IsUser()) {
321  ids = GetDBLinkIDs(seqdesc.GetUser(), field);
322  }
323  return ids;
324 }
325 
326 
327 vector<string> GetBiosampleIDs(CBioseq_Handle bh)
328 {
329  vector<string> ids;
330 
331  edit::CDBLinkField dblink_field(edit::CDBLinkField::eDBLinkFieldType_BioSample);
332  vector<CConstRef<CObject> > objs = dblink_field.GetObjects(bh);
333  ITERATE(vector<CConstRef<CObject> >, it, objs) {
334  vector<string> new_ids = dblink_field.GetVals(**it);
335  ITERATE(vector<string>, s, new_ids) {
336  ids.push_back(*s);
337  }
338  }
339  return ids;
340 }
341 
342 
344 {
345  vector<string> ids;
346 
347  CSeqdesc_CI desc_ci(bh, CSeqdesc::e_User);
348  while (desc_ci) {
349  vector<string> new_ids = GetDBLinkIDs(*desc_ci, "BioProject");
350  ITERATE(vector<string>, s, new_ids) {
351  ids.push_back(*s);
352  }
353  ++desc_ci;
354  }
355  return ids;
356 }
357 
358 
359 void CBiosampleFieldDiff::PrintHeader(ncbi::CNcbiOstream & stream, bool show_seq_id)
360 {
361  stream << "#sample\tattribute";
362  if (show_seq_id) {
363  stream << "\tSequenceID";
364  }
365  stream << "\told_value\tnew_value" << endl;
366 }
367 
368 
369 void CBiosampleFieldDiff::Print(CNcbiOstream& stream, bool show_seq_id) const
370 {
371  bool blank_sample = NStr::IsBlank(m_SampleVal);
372  bool blank_src = NStr::IsBlank(m_SrcVal);
373  if (blank_sample && blank_src) {
374  return;
375  }
376  bool use_geo_loc_name = CSubSource::NCBI_UseGeoLocNameForCountry();
377  stream << m_BiosampleID << "\t";
378  if (use_geo_loc_name && m_FieldName == "country") {
379  stream << "geo_loc_name" << "\t";
380  } else {
381  stream << m_FieldName << "\t";
382  }
383  if (show_seq_id) {
384  stream << m_SequenceID << "\t";
385  }
386  stream << (blank_sample ? "[[add]]" : m_SampleVal) << "\t";
387  stream << (blank_src ? "[[delete]]" : m_SrcVal) << endl;
388 }
389 
390 
391 void sPrintField(const string& value, size_t width, CNcbiOstream& ostr)
392 {
393  auto formattedValue = (value + string(width, ' ')).substr(0, width);
394  ostr << formattedValue;
395 }
396 
398  CNcbiOstream& ostr,
399  size_t keyWidth,
400  size_t valueWidth) const
401 {
402  auto attribute = GetFieldName();
403  auto newValue = GetSampleVal();
404  auto oldValue = GetSrcVal();
405 
406  if (oldValue.empty() && !newValue.empty()) {
407  newValue = string("[[add]] ") + newValue;
408  }
409  if (!oldValue.empty() && newValue.empty()) {
410  oldValue = string("[[delete]] ") + oldValue;
411  }
412  sPrintField(attribute, keyWidth, ostr);
413  sPrintField(oldValue, valueWidth, ostr);
414  sPrintField(newValue, valueWidth, ostr);
415  ostr << "\n";
416 }
417 
419 {
420  if (!NStr::EqualNocase(m_BiosampleID, prev.m_BiosampleID)) {
421  Print(stream);
422  } else {
423  stream << "\t";
424  if (!NStr::EqualNocase(m_FieldName, prev.m_FieldName)) {
425  stream << m_FieldName;
426  }
427  bool blank_sample = NStr::IsBlank(m_SampleVal) || CBioSource::IsStopWord(m_SampleVal);
428  stream << "\t";
429  stream << m_SequenceID << "\t";
430  stream << (blank_sample ? "" : m_SampleVal) << "\t";
431  stream << m_SrcVal << "\t";
432  stream << endl;
433  }
434 }
435 
436 
438 {
439  int cmp = NStr::CompareCase(m_BiosampleID, other.m_BiosampleID);
440  if (cmp == 0) {
441  cmp = NStr::CompareNocase(m_FieldName, other.m_FieldName);
442  if (cmp == 0) {
443  // "mixed" matches to anything
444  if (!NStr::EqualNocase(m_SrcVal, "mixed") && !NStr::EqualNocase(other.m_SrcVal, "mixed")) {
445  cmp = NStr::CompareNocase(m_SrcVal, other.m_SrcVal);
446  // note - if BioSample ID is the same, sample_val should also be the same
447  }
448  }
449  }
450 
451  return cmp;
452 }
453 
454 
456 {
457  int cmp = CompareAllButSequenceID(other);
458  if (cmp == 0) {
459  cmp = NStr::CompareCase(m_SequenceID, other.m_SequenceID);
460  }
461 
462  return cmp;
463 }
464 
465 
467 {
468  if (!f1) {
469  return true;
470  } else if (!f2) {
471  return false;
472  }
473  string name1 = f1->GetLabel();
474  string name2 = f2->GetLabel();
475  int cmp = NStr::Compare (name1, name2);
476  if (cmp < 0) {
477  return true;
478  } else {
479  return false;
480  }
481 }
482 
483 
485 {
487 
488  ITERATE(vector<CConstRef<CUser_object> >, it, src) {
490  fields.insert(fields.end(), src_fields.begin(), src_fields.end());
491  }
492 
493  // no need to sort and unique if there are less than two fields
494  if (fields.size() < 2) {
495  return fields;
496  }
497  sort(fields.begin(), fields.end(), s_CompareStructuredCommentFields);
498 
499  TStructuredCommentTableColumnList::iterator f_prev = fields.begin();
500  TStructuredCommentTableColumnList::iterator f_next = f_prev;
501  f_next++;
502  while (f_next != fields.end()) {
503  if (NStr::Equal((*f_prev)->GetLabel(), (*f_next)->GetLabel())) {
504  f_next = fields.erase(f_next);
505  } else {
506  ++f_prev;
507  ++f_next;
508  }
509  }
510 
511  return fields;
512 }
513 
514 
515 TBiosampleFieldDiffList GetFieldDiffs(const string& sequence_id, const string& biosample_id, const CBioSource& src, const CBioSource& sample)
516 {
518 
519  TFieldDiffList src_diffs = src.GetBiosampleDiffs(sample);
520  ITERATE(TFieldDiffList, it, src_diffs) {
521  CRef<CBiosampleFieldDiff> diff(new CBiosampleFieldDiff(sequence_id, biosample_id, **it));
522  rval.push_back(diff);
523  }
524 
525 
526  return rval;
527 }
528 
529 
530 bool s_ShouldIgnoreStructuredCommentFieldDiff (const string& label, const string& src_val, const string& sample_val)
531 {
532  if (NStr::Equal(label, "StructuredCommentPrefix")
533  || NStr::Equal(label, "StructuredCommentSuffix")) {
534  return true;
535  } else if (NStr::EqualNocase(src_val, sample_val)) {
536  return true;
537  } else {
538  return false;
539  }
540 }
541 
542 
543 TBiosampleFieldDiffList GetFieldDiffs(const string& sequence_id, const string& biosample_id, const CUser_object& src, const CUser_object& sample)
544 {
546 
547  vector<CConstRef<CUser_object> > src_list;
548  CConstRef<CUser_object> s1(&src);
549  src_list.push_back(s1);
550  CConstRef<CUser_object> s2(&sample);
551  src_list.push_back(s2);
552 
554 
555  ITERATE(TStructuredCommentTableColumnList, it, field_list) {
556  if (NStr::Equal((*it)->GetLabel(), "StructuredCommentPrefix")
557  || NStr::Equal((*it)->GetLabel(), "StructuredCommentSuffix")) {
558  continue;
559  }
560  string src_val = (*it)->GetFromComment(src);
561  string sample_val = (*it)->GetFromComment(sample);
562 
563  if (!s_ShouldIgnoreStructuredCommentFieldDiff((*it)->GetLabel(), src_val, sample_val)) {
564  CRef<CBiosampleFieldDiff> diff(new CBiosampleFieldDiff(sequence_id, biosample_id, (*it)->GetLabel(), src_val, sample_val));
565  rval.push_back(diff);
566  }
567  }
568 
569  return rval;
570 }
571 
572 
573 TBiosampleFieldDiffList GetFieldDiffs(const string& sequence_id, const string& biosample_id, CConstRef<CUser_object> src, CConstRef<CUser_object> sample)
574 {
576 
577  vector<CConstRef<CUser_object> > src_list;
578  if (src) {
579  src_list.push_back(src);
580  }
581  if (sample) {
582  src_list.push_back(sample);
583  }
584 
586 
587  ITERATE(TStructuredCommentTableColumnList, it, field_list) {
588  string src_val = "";
589  if (src) {
590  src_val = (*it)->GetFromComment(*src);
591  }
592  string sample_val = "";
593  if (sample) {
594  (*it)->GetFromComment(*sample);
595  }
596  if (!s_ShouldIgnoreStructuredCommentFieldDiff((*it)->GetLabel(), src_val, sample_val)) {
597  CRef<CBiosampleFieldDiff> diff(new CBiosampleFieldDiff(sequence_id, biosample_id, (*it)->GetLabel(), src_val, sample_val));
598  rval.push_back(diff);
599  }
600  }
601 
602  return rval;
603 }
604 
605 
607 {
608  if (diffs.empty()) {
609  return false;;
610  }
611 
612  bool rval = false;
613  bool printed_header = false;
614 
615  ITERATE (TBiosampleFieldDiffList, it, diffs) {
616  string src_val = (*it)->GetSrcVal();
617  if (!NStr::IsBlank(src_val)) {
618  if (log) {
619  if (!printed_header) {
620  *log << "Conflict found for " << (*it)->GetSequenceId() << " for " << (*it)->GetBioSample() << endl;
621  printed_header = true;
622  }
623  *log << "\t" << (*it)->GetFieldName() << ": BioSource contains \"" << src_val << "\", BioSample contains \"" << (*it)->GetSampleVal() << "\"" << endl;
624  }
625  rval = true;
626  }
627  }
628  return rval;
629 }
630 
631 
632 bool s_IsReportableStructuredComment(const CSeqdesc& desc, const string& expected_prefix)
633 {
634  if (!desc.IsUser()) {
635  return false;
636  }
637 
638  bool rval = false;
639 
640  const CUser_object& user = desc.GetUser();
641 
642  if (!user.IsSetType() || !user.GetType().IsStr()
643  || !NStr::Equal(user.GetType().GetStr(), "StructuredComment")){
644  rval = false;
645  } else {
647  if (NStr::IsBlank (expected_prefix)) {
648  if (!NStr::StartsWith(prefix, "##Genome-Assembly-Data", NStr::eNocase)
649  && !NStr::StartsWith(prefix, "##Assembly-Data", NStr::eNocase)
650  && !NStr::StartsWith(prefix, "##Genome-Annotation-Data", NStr::eNocase)) {
651  rval = true;
652  }
653  } else if (NStr::StartsWith(prefix, expected_prefix)) {
654  rval = true;
655  }
656  }
657  return rval;
658 }
659 
660 
662 {
663  ITERATE (CSeq_table::TColumns, cit, values_table->GetColumns()) {
664  if ((*cit)->IsSetHeader() && (*cit)->GetHeader().IsSetTitle()
665  && NStr::Equal ((*cit)->GetHeader().GetTitle(), column_name)) {
666  return *cit;
667  }
668  }
670  return empty;
671 }
672 
673 
675 {
676  while (column->SetData().SetString().size() < row + 1) {
677  column->SetData().SetString().push_back ("");
678  }
679  column->SetData().SetString()[row] = value;
680 }
681 
682 
683 void AddValueToTable (CSeq_table& table, string column_name, string value, size_t row)
684 {
685  // do we already have a column for this subtype?
686  bool found = false;
687  NON_CONST_ITERATE (CSeq_table::TColumns, cit, table.SetColumns()) {
688  if ((*cit)->IsSetHeader() && (*cit)->GetHeader().IsSetTitle()
689  && NStr::EqualNocase((*cit)->GetHeader().GetTitle(), column_name)) {
690  AddValueToColumn((*cit), value, row);
691  found = true;
692  break;
693  }
694  }
695  if (!found) {
696  CRef<objects::CSeqTable_column> new_col(new objects::CSeqTable_column());
697  new_col->SetHeader().SetTitle(column_name);
698  AddValueToColumn(new_col, value, row);
699  table.SetColumns().push_back(new_col);
700  }
701 }
702 
703 
704 string GetValueFromColumn(const CSeqTable_column& column, size_t row)
705 {
706  string val = "";
707 
708  if (column.IsSetData() && column.GetData().IsString() && column.GetData().GetString().size() > row) {
709  val = column.GetData().GetString()[row];
710  }
711  return val;
712 }
713 
714 
715 string GetValueFromTable(const CSeq_table& table, string column_name, size_t row)
716 {
717  string val = "";
718  ITERATE (CSeq_table::TColumns, cit, table.GetColumns()) {
719  if ((*cit)->IsSetHeader() && (*cit)->GetHeader().IsSetTitle()
720  && NStr::EqualNocase((*cit)->GetHeader().GetTitle(), column_name)) {
721  val = GetValueFromColumn((**cit), row);
722  break;
723  }
724  }
725  return val;
726 }
727 
728 
729 static bool s_IsCitSub (const CSeqdesc& desc)
730 {
731  if (!desc.IsPub() || !desc.GetPub().IsSetPub()) {
732  return false;
733  }
734  ITERATE(CPubdesc::TPub::Tdata, it, desc.GetPub().GetPub().Get()) {
735  if ((*it)->IsSub()) {
736  return true;
737  }
738  }
739 
740  return false;
741 }
742 
743 
744 static const char* kSequenceID = "Sequence ID";
745 static const char* kAffilInst = "Institution";
746 static const char* kAffilDept = "Department";
747 static const char* kBioProject = "BioProject";
748 
749 // This function is for generating a table of biosample values for a bioseq
750 // that does not currently have a biosample ID
752  bool include_comments, const string& expected_prefix)
753 {
754  vector<string> biosample_ids = GetBiosampleIDs(bh);
755  if (biosample_ids.size() > 0 && !with_id) {
756  // do not collect if already has biosample ID
757  string msg = GetBestBioseqLabel(bh) + " already has Biosample ID " + biosample_ids[0];
759  }
760  vector<string> bioproject_ids = GetBioProjectIDs(bh);
761 
762  CSeqdesc_CI src_desc_ci(bh, CSeqdesc::e_Source);
763  CSeqdesc_CI comm_desc_ci(bh, CSeqdesc::e_User);
764  while (comm_desc_ci && !s_IsReportableStructuredComment(*comm_desc_ci, expected_prefix)) {
765  ++comm_desc_ci;
766  }
767 
768  CSeqdesc_CI pub_desc_ci(bh, CSeqdesc::e_Pub);
769  while (pub_desc_ci && !s_IsCitSub(*pub_desc_ci)) {
770  ++pub_desc_ci;
771  }
772 
773  if (!src_desc_ci && !comm_desc_ci && bioproject_ids.size() == 0 && !pub_desc_ci) {
774  return;
775  }
776 
777  string sequence_id = GetBestBioseqLabel(bh);
778  size_t row = table.GetNum_rows();
779  AddValueToTable (table, kSequenceID, sequence_id, row);
780 
781  if (bioproject_ids.size() > 0) {
782  string val = bioproject_ids[0];
783  for (size_t i = 1; i < bioproject_ids.size(); i++) {
784  val += ";";
785  val += bioproject_ids[i];
786  }
788  }
789 
790  if (pub_desc_ci) {
791  ITERATE(CPubdesc::TPub::Tdata, it, pub_desc_ci->GetPub().GetPub().Get()) {
792  if ((*it)->IsSub() && (*it)->GetSub().IsSetAuthors() && (*it)->GetSub().GetAuthors().IsSetAffil()) {
793  const CAffil& affil = (*it)->GetSub().GetAuthors().GetAffil();
794  if (affil.IsStd()) {
795  if (affil.GetStd().IsSetAffil()) {
796  AddValueToTable(table, kAffilInst, affil.GetStd().GetAffil(), row);
797  }
798  if (affil.GetStd().IsSetDiv()) {
799  AddValueToTable(table, kAffilDept, affil.GetStd().GetDiv(), row);
800  }
801  } else if (affil.IsStr()) {
802  AddValueToTable(table, kAffilInst, affil.GetStr(), row);
803  }
804  break;
805  }
806  }
807  }
808 
809  if (src_desc_ci) {
810  const CBioSource& src = src_desc_ci->GetSource();
811  CBioSource::TNameValList src_vals = src.GetNameValPairs();
812  ITERATE(CBioSource::TNameValList, it, src_vals) {
813  AddValueToTable(table, it->first, it->second, row);
814  }
815  }
816 
817  if (include_comments) {
818  while (comm_desc_ci) {
819  const CUser_object& usr = comm_desc_ci->GetUser();
821  ITERATE(TStructuredCommentTableColumnList, it, comm_fields) {
822  string label = (*it)->GetLabel();
823  AddValueToTable(table, (*it)->GetLabel(), (*it)->GetFromComment(usr), row);
824  }
825  ++comm_desc_ci;
826  while (comm_desc_ci && !s_IsReportableStructuredComment(*comm_desc_ci, expected_prefix)) {
827  ++comm_desc_ci;
828  }
829  }
830  }
831  if (with_id && biosample_ids.size() > 0) {
832  AddValueToTable(table, "BioSample ID", biosample_ids[0], row);
833  }
834  int num_rows = (int)row + 1;
835  table.SetNum_rows(num_rows);
836 }
837 
838 
839 void HarmonizeAttributeName(string& attribute_name)
840 {
841  NStr::ReplaceInPlace (attribute_name, " ", "");
842  NStr::ReplaceInPlace (attribute_name, "_", "");
843  NStr::ReplaceInPlace (attribute_name, "-", "");
844 }
845 
846 
847 bool AttributeNamesAreEquivalent (string name1, string name2)
848 {
849  HarmonizeAttributeName(name1);
850  HarmonizeAttributeName(name2);
851  return NStr::EqualNocase(name1, name2);
852 }
853 
854 
855 bool ResolveSuppliedBioSampleAccession(const string& biosample_accession, vector<string>& biosample_ids)
856 {
857  if (!NStr::IsBlank(biosample_accession)) {
858  if (biosample_ids.size() == 0) {
859  // use supplied BioSample accession
860  biosample_ids.push_back(biosample_accession);
861  } else {
862  // make sure supplied BioSample accession is listed
863  bool found = false;
864  ITERATE(vector<string>, it, biosample_ids) {
865  if (NStr::EqualNocase(*it, biosample_accession)) {
866  found = true;
867  break;
868  }
869  }
870  if (!found) {
871  return false;
872  }
873  biosample_ids.clear();
874  biosample_ids.push_back(biosample_accession);
875  }
876  }
877  return true;
878 }
879 
880 
882 {
883  string label = "";
884 
886  vector<CRef <CSeq_id> > id_list;
887  ITERATE(CBioseq_Handle::TId, it, bsh.GetId()) {
888  CConstRef<CSeq_id> ir = (*it).GetSeqId();
889  if (ir->IsGenbank()) {
890  id = ir;
891  }
892  CRef<CSeq_id> ic(const_cast<CSeq_id *>(ir.GetPointer()));
893  id_list.push_back(ic);
894  }
895  if (!id) {
896  id = FindBestChoice(id_list, CSeq_id::BestRank);
897  }
898  if (id) {
899  id->GetLabel(&label);
900  }
901 
902  return label;
903 }
904 
905 
908  const string& biosample_accession,
909  size_t& num_processed,
910  vector<string>& unprocessed_ids,
911  bool use_dev_server,
912  bool compare_structured_comments,
913  const string& expected_prefix,
914  TBioSamples *cache)
915 {
917 
918  CSeqdesc_CI src_desc_ci(bh, CSeqdesc::e_Source);
919  CSeqdesc_CI comm_desc_ci(bh, CSeqdesc::e_User);
920  vector<string> user_labels;
921  vector<CConstRef<CUser_object> > user_objs;
922  while (comm_desc_ci) {
923  if (s_IsReportableStructuredComment(*comm_desc_ci, expected_prefix)) {
924  const CUser_object& user = comm_desc_ci->GetUser();
926  CConstRef<CUser_object> obj(&user);
927  user_labels.push_back(prefix);
928  user_objs.push_back(obj);
929  }
930  ++comm_desc_ci;
931  }
932 
933  // disabled because we want to consider *all* bioseqs, not just the ones with weird user objects:
934  //if (!src_desc_ci && user_labels.size() == 0) {
935  // return diffs;
936  //}
937 
938  vector<string> biosample_ids = GetBiosampleIDs(bh);
939 
940  if (!ResolveSuppliedBioSampleAccession(biosample_accession, biosample_ids)) {
941  // error
942  string msg = GetBestBioseqLabel(bh) + " has conflicting BioSample Accession " + biosample_ids[0];
944  }
945 
946  if (biosample_ids.size() == 0) {
947  // for report mode, do not report if no biosample ID
948  return diffs;
949  }
950 
951  string sequence_id = GetBestBioseqLabel(bh);
952 
953  ITERATE(vector<string>, id, biosample_ids) {
954  CRef<CSeq_descr> descr = GetBiosampleData(*id, use_dev_server, cache);
955  if (descr) {
956  ITERATE(CSeq_descr::Tdata, it, descr->Get()) {
957  if ((*it)->IsSource()) {
958  if (src_desc_ci) {
959  TBiosampleFieldDiffList these_diffs = GetFieldDiffs(sequence_id,
960  *id,
961  src_desc_ci->GetSource(),
962  (*it)->GetSource());
963  diffs.insert(diffs.end(), these_diffs.begin(), these_diffs.end());
964  }
965  } else if ((*it)->IsUser() && s_IsReportableStructuredComment(**it, expected_prefix)) {
966  if (compare_structured_comments) {
967  CConstRef<CUser_object> sample(&(*it)->GetUser());
968  string this_prefix = CComment_rule::GetStructuredCommentPrefix((*it)->GetUser());
969  bool found = false;
970  vector<string>::iterator sit = user_labels.begin();
971  vector<CConstRef<CUser_object> >::iterator uit = user_objs.begin();
972  while (sit != user_labels.end() && uit != user_objs.end()) {
973  if (NStr::EqualNocase(*sit, this_prefix)) {
974  TBiosampleFieldDiffList these_diffs = GetFieldDiffs(sequence_id, *id, *uit, sample);
975  diffs.insert(diffs.end(), these_diffs.begin(), these_diffs.end());
976  found = true;
977  }
978  ++sit;
979  ++uit;
980  }
981  if (!found) {
982  TBiosampleFieldDiffList these_diffs = GetFieldDiffs(sequence_id,
983  *id,
985  sample);
986  diffs.insert(diffs.end(), these_diffs.begin(), these_diffs.end());
987  }
988  }
989  }
990  }
991  num_processed++;
992  } else {
993  unprocessed_ids.push_back(*id);
994  }
995  }
996  return diffs;
997 }
998 
999 
1000 // section for creating XML
1001 
1002 void AddContact(node::iterator& organization, CConstRef<CAuth_list> auth_list)
1003 {
1004  string email = "";
1005  string street = "";
1006  string city = "";
1007  string sub = "";
1008  string country = "";
1009  string first = "";
1010  string last = "";
1011  bool add_address = false;
1012 
1013  CConstRef<CAffil> affil(NULL);
1014  if (auth_list && auth_list->IsSetAffil()) {
1015  affil = &(auth_list->GetAffil());
1016  }
1017 
1018  if (affil && affil->IsStd()) {
1019  const CAffil::TStd& std = affil->GetStd();
1020  string email = "";
1021  if (std.IsSetEmail()) {
1022  email = std.GetEmail();
1023  }
1024  if (std.IsSetStreet() && !NStr::IsBlank(std.GetStreet())
1025  && std.IsSetCity() && !NStr::IsBlank(std.GetCity())
1026  && std.IsSetSub() && !NStr::IsBlank(std.GetSub())
1027  && std.IsSetCountry() && !NStr::IsBlank(std.GetCountry())) {
1028  street = std.GetStreet();
1029  city = std.GetCity();
1030  sub = std.GetSub();
1031  country = std.GetCountry();
1032  add_address = true;
1033  }
1034  }
1035 
1036  if (auth_list && auth_list->IsSetNames() && auth_list->GetNames().IsStd()
1037  && auth_list->GetNames().GetStd().size()
1038  && auth_list->GetNames().GetStd().front()->IsSetName()
1039  && auth_list->GetNames().GetStd().front()->GetName().IsName()) {
1040  const CName_std& nstd = auth_list->GetNames().GetStd().front()->GetName().GetName();
1041  string first = "";
1042  string last = "";
1043  if (nstd.IsSetFirst()) {
1044  first = nstd.GetFirst();
1045  }
1046  if (nstd.IsSetLast()) {
1047  last = nstd.GetLast();
1048  }
1049  }
1050 
1051  if (NStr::IsBlank(email) || NStr::IsBlank(first) || NStr::IsBlank(last)) {
1052  // just don't add contact if no email address or name
1053  return;
1054  }
1055  node::iterator contact = organization->insert(node("Contact"));
1056  contact->get_attributes().insert("email", email.c_str());
1057  if (add_address) {
1058  node::iterator address = contact->insert(node("Address"));
1059  address->insert(node("Street", street.c_str()));
1060  address->insert(node("City", city.c_str()));
1061  address->insert(node("Sub", sub.c_str()));
1062  address->insert(node("Country", country.c_str()));
1063  }
1064 
1065  node::iterator name = contact->insert(node("Name"));
1066  name->insert(node("First", first.c_str()));
1067  name->insert(node("Last", last.c_str()));
1068 }
1069 
1070 
1071 void s_AddSamplePair(node& sample_attrs, string attribute_name, string val)
1072 {
1073  sample_attrs.insert(node("Attribute", val.c_str()))
1074  ->get_attributes().insert("attribute_name", attribute_name.c_str());
1075 
1076 }
1077 
1078 void AddBioSourceToAttributes(node& organism, node& sample_attrs, const CBioSource& src)
1079 {
1080  if (src.IsSetSubtype()) {
1082  if ((*it)->IsSetSubtype() && (*it)->IsSetName()) {
1083  CSubSource::TSubtype st = (*it)->GetSubtype();
1084  string attribute_name = "";
1085  if (st == CSubSource::eSubtype_other) {
1086  attribute_name = "subsrc_note";
1087  } else {
1088  attribute_name = CSubSource::GetSubtypeName((*it)->GetSubtype());
1089  }
1090 
1091  string val = (*it)->GetName();
1093  val = "true";
1094  }
1095  if (!CBioSource::ShouldIgnoreConflict(attribute_name, val, "")) {
1096  s_AddSamplePair(sample_attrs, attribute_name, val);
1097  }
1098  }
1099  }
1100  }
1101 
1102  if (src.IsSetOrg()) {
1103  if (src.GetOrg().IsSetTaxname()) {
1104  organism.insert(node("OrganismName", src.GetOrg().GetTaxname().c_str()));
1105  }
1106  if (src.GetOrg().IsSetOrgMod()) {
1107  ITERATE(COrgName::TMod, it, src.GetOrg().GetOrgname().GetMod()) {
1108  if ((*it)->IsSetSubtype() && (*it)->IsSetSubname()) {
1109  string attribute_name = "";
1110  if ((*it)->GetSubtype() == COrgMod::eSubtype_other) {
1111  attribute_name = "orgmod_note";
1112  } else {
1113  attribute_name = COrgMod::GetSubtypeName((*it)->GetSubtype());
1114  }
1115  if (!CBioSource::ShouldIgnoreConflict(attribute_name, (*it)->GetSubname(), "")) {
1116  s_AddSamplePair(sample_attrs, attribute_name, (*it)->GetSubname());
1117  }
1118  }
1119  }
1120  }
1121  }
1122 }
1123 
1124 
1125 static const char* kStructuredCommentPrefix = "StructuredCommentPrefix";
1126 static const char* kStructuredCommentSuffix = "StructuredCommentSuffix";
1127 
1128 void AddStructuredCommentToAttributes(node& sample_attrs, const CUser_object& usr)
1129 {
1131  ITERATE(TStructuredCommentTableColumnList, it, comm_fields) {
1132  string label = (*it)->GetLabel();
1135  continue;
1136  }
1137  string val = (*it)->GetFromComment(usr);
1138 
1139  node::iterator a = sample_attrs.begin();
1140  bool found = false;
1141  while (a != sample_attrs.end() && !found) {
1142  if (NStr::Equal(a->get_name(), "Attribute")) {
1143  attributes::const_iterator at = a->get_attributes().begin();
1144  bool name_match = false;
1145  while (at != a->get_attributes().end() && !name_match) {
1146  if (NStr::Equal(at->get_name(), "attribute_name")
1148  name_match = true;
1149  }
1150  ++at;
1151  }
1152  if (name_match) {
1153  if (NStr::Equal(a->get_content(), val)) {
1154  found = true;
1155  }
1156  }
1157  }
1158  ++a;
1159  }
1160  if (!found) {
1161  sample_attrs.insert(node("Attribute", val.c_str()))
1162  ->get_attributes().insert("attribute_name", label.c_str());
1163  }
1164  }
1165 }
1166 
1167 
1168 string OwnerFromAffil(const CAffil& affil)
1169 {
1170  list<string> sbm_info;
1171  if (affil.IsStd()) {
1172  if (affil.GetStd().IsSetAffil()) {
1173  sbm_info.push_back(affil.GetStd().GetAffil());
1174  }
1175  if (affil.GetStd().IsSetDiv()
1176  && (!affil.GetStd().IsSetAffil()
1177  || !NStr::EqualNocase(affil.GetStd().GetDiv(), affil.GetStd().GetAffil()))) {
1178  sbm_info.push_back(affil.GetStd().GetDiv());
1179  }
1180  } else if (affil.IsStr()) {
1181  sbm_info.push_back(affil.GetStr());
1182  }
1183 
1184  return NStr::Join(sbm_info, ", ");
1185 }
1186 
1187 
1188 // This function is for generating a table of biosample values for a bioseq
1189 // that does not currently have a biosample ID
1191  const string& id_prefix,
1192  CNcbiOstream* report_stream,
1193  const string& bioproject_accession,
1194  const string& default_owner,
1195  const string& hup_date,
1196  const string& comment,
1197  bool first_seq_only,
1198  bool report_structured_comments,
1199  const string& expected_prefix)
1200 {
1201  vector<string> biosample_ids = GetBiosampleIDs(bh);
1202  if (biosample_ids.size() > 0) {
1203  // do not collect if already has biosample ID
1204  string msg = GetBestBioseqLabel(bh) + " already has BioSample ID " + biosample_ids[0];
1205  NCBI_THROW(CException, eUnknown, msg );
1206  }
1207  vector<string> bioproject_ids = biosample_util::GetBioProjectIDs(bh);
1208  if (bioproject_ids.size() > 0) {
1209  if (!NStr::IsBlank(bioproject_accession)) {
1210  bool found = false;
1211  ITERATE(vector<string>, it, bioproject_ids) {
1212  if (NStr::EqualNocase(*it, bioproject_accession)) {
1213  found = true;
1214  break;
1215  }
1216  }
1217  if (!found) {
1218  // error
1219  string msg = GetBestBioseqLabel(bh) +
1220  " has conflicting BioProject ID " + bioproject_ids[0];
1221  NCBI_THROW(CException, eUnknown, msg );
1222  }
1223  bioproject_ids.clear();
1224  bioproject_ids.push_back(bioproject_accession);
1225  }
1226  } else if (!NStr::IsBlank(bioproject_accession)) {
1227  bioproject_ids.push_back(bioproject_accession);
1228  }
1229 
1230  CSeqdesc_CI src_desc_ci(bh, CSeqdesc::e_Source);
1231  CSeqdesc_CI comm_desc_ci(bh, CSeqdesc::e_User);
1232  while (comm_desc_ci && !s_IsReportableStructuredComment(*comm_desc_ci, expected_prefix)) {
1233  ++comm_desc_ci;
1234  }
1235 
1236  CConstRef<CAuth_list> auth_list(NULL);
1237  CSeqdesc_CI pub_desc_ci(bh, CSeqdesc::e_Pub);
1238  while (pub_desc_ci){
1239  if (pub_desc_ci->GetPub().IsSetPub()) {
1240  ITERATE(CPubdesc::TPub::Tdata, it, pub_desc_ci->GetPub().GetPub().Get()) {
1241  if ((*it)->IsSub() && (*it)->GetSub().IsSetAuthors()) {
1242  auth_list = &((*it)->GetSub().GetAuthors());
1243  break;
1244  } else if ((*it)->IsGen() && (*it)->GetGen().IsSetAuthors()) {
1245  auth_list = &((*it)->GetGen().GetAuthors());
1246  }
1247  }
1248  }
1249  ++pub_desc_ci;
1250  }
1251 
1252  if (!src_desc_ci && !comm_desc_ci && bioproject_ids.size() == 0) {
1253  string msg = GetBestBioseqLabel(bh) + " has no BioSample information";
1254  NCBI_THROW(CException, eUnknown, msg );
1255  }
1256 
1257  string sequence_id = GetBestBioseqLabel(bh);
1258  CTime tNow(CTime::eCurrent);
1259  document doc("Submission");
1260  doc.set_encoding("UTF-8");
1261  doc.set_is_standalone(true);
1262 
1263  node & root = doc.get_root_node();
1264 
1265  node::iterator description = root.insert(node("Description"));
1266  string title = "Auto generated from GenBank Accession " + sequence_id;
1267  description->insert(node("Comment", title.c_str()));
1268 
1269  node::iterator node_iter = description->insert(node("Submitter"));
1270 
1271  CConstRef<CAffil> affil(NULL);
1272  if (auth_list && auth_list->IsSetAffil()) {
1273  affil = &(auth_list->GetAffil());
1274  }
1275 
1276  // Contact info
1277  node::iterator organization = description->insert(node("Organization"));
1278  {
1279  attributes & attrs = organization->get_attributes();
1280  attrs.insert("role", "owner");
1281  attrs.insert("type", "institute");
1282  }
1283  // same info for sample structure
1284  node owner("Owner");
1285 
1286  string owner_str = "";
1287  if (affil) {
1288  owner_str = OwnerFromAffil(*affil);
1289  }
1290  if (NStr::IsBlank(owner_str)) {
1291  owner_str = default_owner;
1292  }
1293 
1294  organization->insert(node("Name", owner_str.c_str()));
1295  owner.insert(node("Name", owner_str.c_str()));
1296 
1297  AddContact(organization, auth_list);
1298 
1299  if (!NStr::IsBlank(hup_date)) {
1300  node hup("Hold");
1301  hup.get_attributes().insert("release_date", hup_date.c_str());
1302  description->insert(hup);
1303  }
1304 
1305  node::iterator add_data = root.insert(node("Action"))->insert(node("AddData"));
1306  add_data->get_attributes().insert("target_db", "BioSample");
1307 
1308  // BioSample-specific xml
1309  node::iterator data = add_data->insert(node("Data"));
1310  data->get_attributes().insert("content_type", "XML");
1311 
1312  node::iterator sample = data->insert(node("XmlContent"))->insert(node("BioSample"));
1313  sample->get_attributes().insert("schema_version", "2.0");
1314 
1315  node::iterator ids = sample->insert(node("SampleId"));
1316  string sample_id = sequence_id;
1317  if (!NStr::IsBlank(id_prefix)) {
1318  if (first_seq_only) {
1319  sample_id = id_prefix;
1320  } else {
1321  sample_id = id_prefix + ":" + sequence_id;
1322  }
1323  }
1324  node_iter = ids->insert(node("SPUID", sample_id.c_str()));
1325  node_iter->get_attributes().insert( "spuid_namespace", "GenBank");
1326 
1327  node::iterator descr = sample->insert(node("Descriptor"));
1328 
1329  if (!NStr::IsBlank(comment)) {
1330  descr->insert(node("Description", comment.c_str()));
1331  }
1332 
1333 
1334  node::iterator organism = sample->insert(node("Organism"));
1335 
1336  // add unique bioproject links from series
1337  if (bioproject_ids.size() > 0) {
1338  node links("BioProject");
1339  ITERATE(vector<string>, it, bioproject_ids) {
1340  if (! it->empty()) {
1341  node_iter = links.insert(node("PrimaryId", it->c_str()));
1342  node_iter->get_attributes().insert("db", "BioProject");
1343  }
1344  }
1345  sample->insert(links);
1346  }
1347 
1348  sample->insert(node("Package", "Generic.1.0"));
1349 
1350  node sample_attrs("Attributes");
1351  if (src_desc_ci) {
1352  const CBioSource& src = src_desc_ci->GetSource();
1353  AddBioSourceToAttributes(*organism, sample_attrs, src);
1354  }
1355 
1356  if (report_structured_comments) {
1357  while (comm_desc_ci) {
1358  const CUser_object& usr = comm_desc_ci->GetUser();
1359  AddStructuredCommentToAttributes(sample_attrs, usr);
1360  ++comm_desc_ci;
1361  while (comm_desc_ci && !s_IsReportableStructuredComment(*comm_desc_ci, expected_prefix)) {
1362  ++comm_desc_ci;
1363  }
1364  }
1365  }
1366  sample->insert(sample_attrs);
1367  node::iterator identifier = add_data->insert(node("Identifier"));
1368  node_iter = identifier->insert(node("SPUID", sample_id.c_str()));
1369  node_iter->get_attributes().insert( "spuid_namespace", "GenBank");
1370 
1371 
1372  // write XML to file
1373  if (report_stream) {
1374  *report_stream << doc << endl;
1375  } else {
1376  string path = sequence_id;
1377  NStr::ReplaceInPlace(path, "|", "_");
1378  NStr::ReplaceInPlace(path, ".", "_");
1379  NStr::ReplaceInPlace(path, ":", "_");
1380 
1381  path = path + ".xml";
1382  CNcbiOstream* xml_out = new CNcbiOfstream(path.c_str());
1383  if (!xml_out)
1384  {
1385  NCBI_THROW(CException, eUnknown, "Unable to open " + path);
1386  }
1387  *xml_out << doc << endl;
1388  }
1389 }
1390 
1391 // rw-905, rw-1010 >>
1392 // ----------------------------------------------------------------------------
1393 void
1395  const CSeq_descr& bioSample,
1396  const CBioSource& bioSource,
1397  TBiosampleFieldDiffList& diffs)
1398 // ----------------------------------------------------------------------------
1399 {
1400  diffs.clear();
1401  for (auto pSampleDesc: bioSample.Get()) {
1402  const CSeqdesc& sampleDesc = *pSampleDesc;
1403  if (!sampleDesc.IsSource()) {
1404  continue;
1405  }
1406  TBiosampleFieldDiffList these_diffs = GetFieldDiffs(
1407  "biosample", "descriptors", bioSource, sampleDesc.GetSource());
1408  diffs.insert(diffs.end(), these_diffs.begin(), these_diffs.end());
1409  return;
1410  }
1411 }
1412 
1413 //#define DUMP_DATA
1414 // ----------------------------------------------------------------------------
1415 void
1417  const string& filename,
1418  const CSerialObject& object)
1419 // -----------------------------------------------------------------------------
1420 {
1421 #ifdef DUMP_DATA
1422  CNcbiOfstream ostr(filename);
1423  MSerial_Format_AsnText asnText;
1424  ostr << asnText << object;
1425  ostr.close();
1426 #endif
1427 }
1428 
1429 // ----------------------------------------------------------------------------
1430 bool
1432  const string& existingBiosampleAcc,
1433  const CBioSource& newBioSource,
1434  CBioSource& proposedNewBiosource,
1435  TBiosampleFieldDiffList& diffs)
1436 // ----------------------------------------------------------------------------
1437 {
1438  SaveSerialObject("submittedSource.asn1", newBioSource);
1439  CRef<CSeq_descr> pExistingBiosampleDescrs = biosample_util::GetBiosampleData(
1440  existingBiosampleAcc, false, nullptr);
1441  SaveSerialObject("curatedDescriptors.asn1", *pExistingBiosampleDescrs);
1442  bool assigned = false;
1443  for (auto pExistingDesc: pExistingBiosampleDescrs->Get()) {
1444  CSeqdesc& existingDesc = *pExistingDesc;
1445  if (!existingDesc.IsSource()) {
1446  continue;
1447  }
1448  const CBioSource& existingSource = existingDesc.GetSource();
1449  SaveSerialObject("curatedSource.asn1", existingSource);
1450  diffs = GetFieldDiffs(
1451  "submitted", "curated", newBioSource, existingSource);
1452  if (!diffs.empty()) {
1453  if (!assigned) {
1454  proposedNewBiosource.Assign(existingSource);
1455  assigned = true;
1456  }
1457  //UpdateBiosourceFromBiosample(diffs, existingSource, proposedNewBiosource);
1458  SaveSerialObject("proposedSource.asn1", proposedNewBiosource);
1459  }
1460  break;
1461  }
1462  return !diffs.empty();
1463 }
1464 
1465 // ----------------------------------------------------------------------------
1466 bool
1468  const CBioSource& existingBiosource,
1469  CBioSource& newBiosource)
1470 // ----------------------------------------------------------------------------
1471 {
1472  if (existingBiosource.IsSetOrg()) {
1473  const auto& existingOrg = existingBiosource.GetOrg();
1474  if (existingOrg.IsSetTaxname()) {
1475  if (!newBiosource.IsSetTaxname()) {
1476  newBiosource.SetOrg().SetTaxname(existingOrg.GetTaxname());
1477  }
1478  }
1479  auto existingTaxId = existingOrg.GetTaxId();
1480  if (existingTaxId > ZERO_TAX_ID) {
1481  if (!newBiosource.IsSetOrg() || newBiosource.GetOrg().GetTaxId() == ZERO_TAX_ID) {
1482  newBiosource.SetOrg().SetTaxId(existingTaxId);
1483  }
1484  }
1485  }
1486  if (existingBiosource.IsSetOrg() && existingBiosource.GetOrg().IsSetOrgname()) {
1487  const auto& existingOrgs = existingBiosource.GetOrg().GetOrgname().GetMod();
1488  auto& newOrgs = newBiosource.SetOrg().SetOrgname().SetMod();
1489  for (const auto& pExistingMod: existingOrgs) {
1490  auto existingSubtype = pExistingMod->GetSubtype();
1491  auto existingSubname = pExistingMod->GetSubname();
1492  bool alreadyThere = false;
1493  for (const auto& pNewMod: newOrgs) {
1494  auto newSubtype = pNewMod->GetSubtype();
1495  if ( newSubtype == existingSubtype) {
1496  alreadyThere = true;
1497  break;
1498  }
1499  }
1500  if (alreadyThere) {
1501  continue;
1502  }
1503  CRef<COrgMod> pNewMod(new COrgMod(existingSubtype, existingSubname));
1504  newOrgs.push_back(pNewMod);
1505  //auto existingAttrib = pExistingMod->GetAttrib();
1506  }
1507  }
1508  if (existingBiosource.IsSetSubtype()) {
1509  const auto& existingSubtypes = existingBiosource.GetSubtype();
1510  for (const auto& pExistingSubSource: existingSubtypes) {
1511  if (!pExistingSubSource->CanGetName()) {
1512  continue;
1513  }
1514  auto existingSubtype = pExistingSubSource->GetSubtype();
1515  //const auto& existingSubkey = CSubSource::GetSubtypeName(existingSubtype);
1516  //const auto& existingSubvalue = pExistingSubSource->GetName();
1517 
1518  if (newBiosource.HasSubtype(existingSubtype)) {
1519  continue;
1520  }
1521  CRef<CSubSource> pNewSubsource(new CSubSource);
1522  pNewSubsource->Assign(*pExistingSubSource);
1523  newBiosource.SetSubtype().push_back(pNewSubsource);
1524  }
1525  }
1526  return true;
1527 }
1528 
1529 // ----------------------------------------------------------------------------
1532  const TBiosampleFieldDiffList& diffs,
1533  const string& fieldName)
1534 // ----------------------------------------------------------------------------
1535 {
1536  for (const auto pDiff: diffs) {
1537  if (pDiff->GetFieldName() == fieldName) {
1538  return pDiff;
1539  }
1540  }
1542 }
1543 
1544 // ----------------------------------------------------------------------------
1545 bool
1547  const TBiosampleFieldDiffList& diffs,
1548  const CBioSource& existingBiosource,
1549  CBioSource& newBiosource)
1550 // ----------------------------------------------------------------------------
1551 {
1552  if (existingBiosource.IsSetOrg()) {
1553  const auto& existingOrg = existingBiosource.GetOrg();
1554  {{
1555  auto fromDiff = sGetDiffByFieldName(diffs, "Organism Name");
1556  if (fromDiff && !fromDiff->GetSrcVal().empty()) {
1557  newBiosource.SetOrg().SetTaxname(existingOrg.GetTaxname());
1558  }
1559  }}
1560  {{
1561  auto fromDiff = sGetDiffByFieldName(diffs, "Tax ID");
1562  auto existingTaxId = existingOrg.GetTaxId();
1563  if (fromDiff && existingTaxId > ZERO_TAX_ID) {
1564  newBiosource.SetOrg().SetTaxId(existingTaxId);
1565  }
1566  }}
1567  }
1568 
1569  if (existingBiosource.IsSetOrg() && existingBiosource.GetOrg().IsSetOrgname()) {
1570  const auto& existingOrgs = existingBiosource.GetOrg().GetOrgname().GetMod();
1571  auto& newOrgs = newBiosource.SetOrg().SetOrgname().SetMod();
1572  for (const auto& pExistingMod: existingOrgs) {
1573  auto existingSubtype = pExistingMod->GetSubtype();
1574  auto existingSubkey = COrgMod::GetSubtypeName(existingSubtype);
1575  auto fromDiff = sGetDiffByFieldName(diffs, existingSubkey);
1576  if (fromDiff && !fromDiff->GetSrcVal().empty()) {
1577  CRef<COrgMod> pNewMod(new COrgMod(existingSubtype, fromDiff->GetSrcVal()));
1578  newOrgs.push_back(pNewMod);
1579  }
1580  }
1581  }
1582 
1583  if (existingBiosource.IsSetSubtype()) {
1584  const auto& existingSubtypes = existingBiosource.GetSubtype();
1585  auto& newSubtypes = newBiosource.SetSubtype();
1586  for (const auto& pExistingSubSource: existingSubtypes) {
1587  if (!pExistingSubSource->CanGetName()) {
1588  continue;
1589  }
1590  auto existingSubtype = pExistingSubSource->GetSubtype();
1591  const auto& existingSubkey = CSubSource::GetSubtypeName(existingSubtype);
1592  auto pFromDiff = sGetDiffByFieldName(diffs, existingSubkey);
1593  if (pFromDiff && !pFromDiff->GetSrcVal().empty()) {
1594  CRef<CSubSource> pNewSubsource(
1595  new CSubSource(existingSubtype, pFromDiff->GetSrcVal()));
1596  newSubtypes.push_back(pNewSubsource);
1597  }
1598  }
1599  }
1600  return true;
1601 }
1602 
1603 // ----------------------------------------------------------------------------
1605  const TBiosampleFieldDiffList& diffList,
1606  CNcbiOstream& ostr,
1607  size_t keyWidth,
1608  size_t valueWidth)
1609 // ----------------------------------------------------------------------------
1610 {
1611  sPrintField("attribute", keyWidth, ostr);
1612  sPrintField("old_value", valueWidth, ostr);
1613  sPrintField("new_value", valueWidth, ostr);
1614  ostr << "\n";
1615 
1616  ostr << string(keyWidth + 2 * valueWidth, '-') << "\n";
1617 
1618  for (auto diff: diffList) {
1619  diff->PrettyPrint(ostr, keyWidth, valueWidth);
1620  }
1621  ostr << "\n";
1622 }
1623 
1624 
1625 
1626 // << rw-905, rw-1010
1627 
1628 END_SCOPE(biosample_util)
vector< CRef< CFieldDiff > > TFieldDiffList
Definition: BioSource.hpp:53
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool s_ShouldIgnoreStructuredCommentFieldDiff(const string &label, const string &src_val, const string &sample_val)
bool UpdateBiosourceFromBiosample(const CBioSource &existingBiosource, CBioSource &newBiosource)
void AddBioseqToTable(CBioseq_Handle bh, CSeq_table &table, bool with_id, bool include_comments, const string &expected_prefix)
string OwnerFromAffil(const CAffil &affil)
static const char * kAffilInst
string GetValueFromColumn(const CSeqTable_column &column, size_t row)
void AddBioSourceToAttributes(node &organism, node &sample_attrs, const CBioSource &src)
void HarmonizeAttributeName(string &attribute_name)
vector< string > GetDBLinkIDs(const CUser_object &user, const string &field_name)
bool s_IsReportableStructuredComment(const CSeqdesc &desc, const string &expected_prefix)
void s_AddSamplePair(node &sample_attrs, string attribute_name, string val)
bool ResolveSuppliedBioSampleAccession(const string &biosample_accession, vector< string > &biosample_ids)
EStatus GetBioSampleStatusFromNode(const node &item)
static const char * kSequenceID
static const char * kStructuredCommentPrefix
static const char * kStructuredCommentSuffix
vector< string > GetBioProjectIDs(CBioseq_Handle bh)
CRef< CSeqTable_column > FindSeqTableColumnByName(CRef< CSeq_table > values_table, string column_name)
void AddValueToTable(CSeq_table &table, string column_name, string value, size_t row)
static const char * kAffilDept
string PrepareUrl(bool use_dev_server, const string &args)
bool DoDiffsContainConflicts(const TBiosampleFieldDiffList &diffs, CNcbiOstream *log)
string GetBestBioseqLabel(CBioseq_Handle bsh)
static const char * kBioProject
TStatus ProcessBiosampleStatusNode(node &item)
CRef< CSeq_descr > GetBiosampleData(const string &accession, bool use_dev_server, TBioSamples *cache)
bool s_CompareStructuredCommentFields(CRef< CStructuredCommentTableColumnBase > f1, CRef< CStructuredCommentTableColumnBase > f2)
CConstRef< CBiosampleFieldDiff > sGetDiffByFieldName(const TBiosampleFieldDiffList &diffs, const string &fieldName)
void PrettyPrint(const TBiosampleFieldDiffList &diffList, CNcbiOstream &ostr, size_t keyWidth, size_t valueWidth)
TStructuredCommentTableColumnList GetAvailableFields(vector< CConstRef< CUser_object > > src)
static bool s_IsCitSub(const CSeqdesc &desc)
void AddStructuredCommentToAttributes(node &sample_attrs, const CUser_object &usr)
TBiosampleFieldDiffList GetBioseqDiffs(CBioseq_Handle bh, const string &biosample_accession, size_t &num_processed, vector< string > &unprocessed_ids, bool use_dev_server, bool compare_structured_comments, const string &expected_prefix, TBioSamples *cache)
void PrintBioseqXML(CBioseq_Handle bh, const string &id_prefix, CNcbiOstream *report_stream, const string &bioproject_accession, const string &default_owner, const string &hup_date, const string &comment, bool first_seq_only, bool report_structured_comments, const string &expected_prefix)
string GetBiosampleStatusName(EStatus status)
EStatus GetBiosampleStatus(const string &accession, bool use_dev_server, TStatuses *cache)
void sPrintField(const string &value, size_t width, CNcbiOstream &ostr)
void GenerateDiffListFromBioSource(const CSeq_descr &bioSample, const CBioSource &bioSource, TBiosampleFieldDiffList &diffs)
void AddContact(node::iterator &organization, CConstRef< CAuth_list > auth_list)
bool AttributeNamesAreEquivalent(string name1, string name2)
string GetValueFromTable(const CSeq_table &table, string column_name, size_t row)
void SaveSerialObject(const string &filename, const CSerialObject &object)
void AddValueToColumn(CRef< CSeqTable_column > column, string value, size_t row)
void ProcessBulkBioSample(TStatuses &status, string list, bool use_dev_server)
vector< string > GetBiosampleIDs(CBioseq_Handle bh)
TBiosampleFieldDiffList GetFieldDiffs(const string &sequence_id, const string &biosample_id, const CBioSource &src, const CBioSource &sample)
vector< CRef< CBiosampleFieldDiff > > TBiosampleFieldDiffList
map< string, CRef< CSeq_descr > >::iterator TBioSamplesIterator
EStatus
@ eStatus_ToBeCurated
@ eStatus_Hup
@ eStatus_Replaced
@ eStatus_Suppressed
@ eStatus_Live
@ eStatus_Unknown
@ eStatus_Withdrawn
pair< string, biosample_util::EStatus > TStatus
map< string, EStatus >::iterator TStatusesIterator
std representation
Definition: Affil_.hpp:91
@Affil.hpp User-defined methods of the data storage class.
Definition: Affil.hpp:56
static bool ShouldIgnoreConflict(const string &label, string src_val, string sample_val, bool is_local_copy=false)
Definition: BioSource.cpp:1033
TNameValList GetNameValPairs() const
Definition: BioSource.cpp:811
static bool IsStopWord(const string &value)
Definition: BioSource.cpp:1382
bool HasSubtype(CSubSource::TSubtype subtype) const
Definition: BioSource.cpp:2040
vector< TNameVal > TNameValList
Definition: BioSource.hpp:127
bool IsSetTaxname(void) const
Definition: BioSource.cpp:335
TFieldDiffList GetBiosampleDiffs(const CBioSource &biosample, bool is_local_copy=false) const
Definition: BioSource.cpp:1327
int Compare(const CBiosampleFieldDiff &other)
static void PrintHeader(ncbi::CNcbiOstream &stream, bool show_seq_id=true)
void PrettyPrint(ncbi::CNcbiOstream &stream, size_t keyWidth=20, size_t valueWidth=40) const
void Print(ncbi::CNcbiOstream &stream, bool show_seq_id=true) const
int CompareAllButSequenceID(const CBiosampleFieldDiff &other)
CBioseq_Handle –.
static string GetStructuredCommentPrefix(const CUser_object &user, bool normalize=true)
This stream exchanges data with an HTTP server located at the URL: http[s]://host[:port]/path[?...
@Name_std.hpp User-defined methods of the data storage class.
Definition: Name_std.hpp:56
CObjectIStreamAsn –.
Definition: objistrasn.hpp:54
@OrgMod.hpp User-defined methods of the data storage class.
Definition: OrgMod.hpp:54
static string GetSubtypeName(TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:108
TTaxId GetTaxId() const
Definition: Org_ref.cpp:72
bool IsSetOrgMod(void) const
Definition: Org_ref.cpp:169
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
Base class for all serializable objects.
Definition: serialbase.hpp:150
static bool NCBI_UseGeoLocNameForCountry(void)
Definition: SubSource.cpp:92
static string GetSubtypeName(CSubSource::TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
Definition: SubSource.cpp:183
static bool NeedsNoText(const TSubtype &subtype)
Definition: SubSource.cpp:231
CTime –.
Definition: ncbitime.hpp:296
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
Definition: User_object.cpp:71
container_type::iterator iterator
Definition: map.hpp:54
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
const char * get_value(void) const
Get the value of this attribute.
Definition: ait_impl.cpp:350
const char * get_name(void) const
Get the name of this attribute.
Definition: ait_impl.cpp:335
Const Iterator class for accessing attribute pairs.
Definition: attributes.hpp:320
Iterator class for accessing attribute pairs.
Definition: attributes.hpp:285
The xml::attributes class is used to access all the attributes of one xml::node.
Definition: attributes.hpp:78
iterator begin(void)
Get an iterator that points to the first attribute.
Definition: attributes.cpp:160
iterator end(void)
Get an iterator that points one past the the last attribute.
Definition: attributes.cpp:174
void insert(const char *name, const char *value, const ns *nspace=NULL)
Add an attribute to the attributes list.
Definition: attributes.cpp:188
The xml::document class is used to hold the XML tree and various bits of information about it.
Definition: document.hpp:80
void set_encoding(const char *encoding)
Set the XML encoding string.
Definition: document.cpp:577
const node & get_root_node(void) const
Get a reference to the root node of this document.
Definition: document.cpp:539
void set_is_standalone(bool sa)
Set the standalone flag.
Definition: document.cpp:594
The xml::error_messages class is used to store all the error message which are collected while parsin...
Definition: errors.hpp:137
The xml::node::iterator provides a way to access children nodes similar to a standard C++ container.
Definition: node.hpp:704
The xml::node class is used to hold information about one XML node.
Definition: node.hpp:106
const char * get_name(void) const
Get the name of this xml::node.
Definition: node.cpp:769
iterator end(void)
Get an iterator that points one past the last child for this node.
Definition: node.hpp:835
iterator begin(void)
Get an iterator that points to the beginning of this node's children.
Definition: node.cpp:1217
iterator insert(const node &n)
Insert a new child node.
Definition: node.cpp:1463
xml::attributes & get_attributes(void)
Get the list of attributes.
Definition: node.cpp:831
void Print(const CCompactSAMApplication::AlignInfo &ai)
char value[7]
Definition: config.c:431
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
@ eUnknown
Definition: app_popup.hpp:72
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
static int BestRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:742
vector< CSeq_id_Handle > TId
const TId & GetId(void) const
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1684
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define kEmptyStr
Definition: ncbistr.hpp:123
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
Definition: ncbistr.hpp:5297
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3401
static int CompareCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive compare of a substring with another string.
Definition: ncbistr.cpp:135
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
@ eCurrent
Use current time. See also CCurrentTime.
Definition: ncbitime.hpp:300
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
static const char label[]
bool IsSetAffil(void) const
author affiliation Check if a value has been assigned to Affil data member.
Definition: Auth_list_.hpp:498
const TAffil & GetAffil(void) const
Get the Affil member data.
Definition: Auth_list_.hpp:510
bool IsSetStreet(void) const
street address, not ANSI Check if a value has been assigned to Street data member.
Definition: Affil_.hpp:923
const TStr & GetStr(void) const
Get the variant data.
Definition: Affil_.hpp:1193
bool IsSetCity(void) const
Author Affiliation, City Check if a value has been assigned to City data member.
Definition: Affil_.hpp:782
const TEmail & GetEmail(void) const
Get the Email member data.
Definition: Affil_.hpp:982
bool IsSetEmail(void) const
Check if a value has been assigned to Email data member.
Definition: Affil_.hpp:970
const TStreet & GetStreet(void) const
Get the Street member data.
Definition: Affil_.hpp:935
const TAffil & GetAffil(void) const
Get the Affil member data.
Definition: Affil_.hpp:700
const TSub & GetSub(void) const
Get the Sub member data.
Definition: Affil_.hpp:841
bool IsStr(void) const
Check if variant Str is selected.
Definition: Affil_.hpp:1187
bool IsSetNames(void) const
Check if a value has been assigned to Names data member.
Definition: Auth_list_.hpp:464
bool IsSetDiv(void) const
Author Affiliation, Division Check if a value has been assigned to Div data member.
Definition: Affil_.hpp:735
const TCountry & GetCountry(void) const
Get the Country member data.
Definition: Affil_.hpp:888
const TStd & GetStd(void) const
Get the variant data.
Definition: Affil_.cpp:214
const TNames & GetNames(void) const
Get the Names member data.
Definition: Auth_list_.hpp:478
const TStd & GetStd(void) const
Get the variant data.
Definition: Auth_list_.hpp:410
bool IsStd(void) const
Check if variant Std is selected.
Definition: Affil_.hpp:1207
bool IsSetCountry(void) const
Author Affiliation, Country Check if a value has been assigned to Country data member.
Definition: Affil_.hpp:876
const TCity & GetCity(void) const
Get the City member data.
Definition: Affil_.hpp:794
bool IsSetAffil(void) const
Author Affiliation, Name Check if a value has been assigned to Affil data member.
Definition: Affil_.hpp:688
const TDiv & GetDiv(void) const
Get the Div member data.
Definition: Affil_.hpp:747
bool IsSetSub(void) const
Author Affiliation, County Sub Check if a value has been assigned to Sub data member.
Definition: Affil_.hpp:829
bool IsStd(void) const
Check if variant Std is selected.
Definition: Auth_list_.hpp:404
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:539
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:497
list< CRef< CSubSource > > TSubtype
Definition: BioSource_.hpp:145
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: BioSource_.hpp:527
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
void SetOrg(TOrg &value)
Assign a value to Org data member.
Definition: BioSource_.cpp:108
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
Definition: BioSource_.hpp:545
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
bool IsStrs(void) const
Check if variant Strs is selected.
const TStrs & GetStrs(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
bool IsSetLast(void) const
Check if a value has been assigned to Last data member.
Definition: Name_std_.hpp:410
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TType & GetType(void) const
Get the Type member data.
const TFirst & GetFirst(void) const
Get the First member data.
Definition: Name_std_.hpp:469
const TLast & GetLast(void) const
Get the Last member data.
Definition: Name_std_.hpp:422
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
vector< CStringUTF8 > TStrs
bool IsSetFirst(void) const
Check if a value has been assigned to First data member.
Definition: Name_std_.hpp:457
const TMod & GetMod(void) const
Get the Mod member data.
Definition: OrgName_.hpp:839
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
Definition: Org_ref_.hpp:372
list< CRef< COrgMod > > TMod
Definition: OrgName_.hpp:332
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
Definition: Org_ref_.hpp:529
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
Definition: Org_ref_.hpp:360
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
Definition: Org_ref_.hpp:541
@ eSubtype_other
ASN5: old-name (254) will be added to next spec.
Definition: OrgMod_.hpp:125
list< CRef< CPub > > Tdata
Definition: Pub_equiv_.hpp:90
const Tdata & Get(void) const
Get the member data.
Definition: Pub_equiv_.hpp:165
const TColumns & GetColumns(void) const
Get the Columns member data.
Definition: Seq_table_.hpp:433
vector< CRef< CSeqTable_column > > TColumns
Definition: Seq_table_.hpp:92
bool IsGenbank(void) const
Check if variant Genbank is selected.
Definition: Seq_id_.hpp:841
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
const TUser & GetUser(void) const
Get the variant data.
Definition: Seqdesc_.cpp:384
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
const TPub & GetPub(void) const
Get the variant data.
Definition: Seqdesc_.cpp:356
bool IsSource(void) const
Check if variant Source is selected.
Definition: Seqdesc_.hpp:1190
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
bool IsPub(void) const
Check if variant Pub is selected.
Definition: Seqdesc_.hpp:1096
bool IsSetPub(void) const
the citation(s) Check if a value has been assigned to Pub data member.
Definition: Pubdesc_.hpp:593
const TPub & GetPub(void) const
Get the Pub member data.
Definition: Pubdesc_.hpp:605
bool IsUser(void) const
Check if variant User is selected.
Definition: Seqdesc_.hpp:1122
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
@ e_Pub
a reference to the publication
Definition: Seqdesc_.hpp:122
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
int i
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
XML library namespace.
Definition: attributes.hpp:57
unsigned int a
Definition: ncbi_localip.c:102
The Object manager core.
static const char * suffix[]
Definition: pcregrep.c:408
static const char * prefix[]
Definition: pcregrep.c:405
static const char * column
Definition: stats.c:23
vector< CRef< CStructuredCommentTableColumnBase > > TStructuredCommentTableColumnList
TStructuredCommentTableColumnList GetStructuredCommentFields(const CUser_object &src)
Modified on Thu Mar 28 17:05:38 2024 by modify_doxy.py rev. 669887