52 #include <objtools/readers/source_mod_parser.hpp>
105 if (create_date_desc.IsNull())
108 create_date_desc.Set().SetCreate_date(*date);
118 static CTimeFormat in_formats[2] = {
"M-D-Y",
"D-M-Y" };
121 if (!
source.IsSetSubtype())
126 for (
auto subtype :
source.SetSubtype())
130 string& col_date = subtype->SetName();
133 col_date =
CTime(col_date, in_formats[p]).
AsString(out_format);
142 size_t p =
context.m_cleanup.find_first_of(
"Dd");
143 if (p == string::npos)
149 for (
auto feature : annot.
SetData().SetFtable())
151 if (feature->IsSetData() && feature->GetData().IsBiosrc())
152 x_CorrectCollectionDates(
context, feature->SetData().SetBiosrc());
159 size_t p =
context.m_cleanup.find_first_of(
"Dd");
160 if (p == string::npos)
163 if (seq_or_set.IsSetDescr())
170 if (seq_or_set.IsSetAnnot())
172 for (
auto annot : seq_or_set.SetAnnot())
173 x_CorrectCollectionDates(
context, *annot);
221 throw std::runtime_error(
"output is not open");
227 throw std::runtime_error(
"output is not open");
269 auto& filename =
f.GetFilename();
270 if (!filename.empty())
274 auto& filename =
f.GetFilename();
275 if (!filename.empty())
282 static constexpr std::array<string_view, 8> default_suffixes = {
293 string_view ext = default_suffixes[
static_cast<int>(kind)];
307 if (
basename == string_view(
"-") || dir ==
"/dev") {
325 for (
auto& desc: descr.
Set())
327 if (desc->IsUser() && desc->GetUser().IsSetType() &&
328 desc->GetUser().GetType().IsStr() &&
329 desc->GetUser().GetType().GetStr() ==
type)
331 return desc->SetUser();
344 descr.
Set().push_back(user_desc);
364 bioseq.
SetId().push_back(accession);
372 submit->
SetSub().SetHup(
true);
374 submit->
SetSub().SetReldate(*reldate);
379 submit->
SetSub().SetTool(toolname);
395 submit->
SetData().SetEntrys().clear();
396 submit->
SetData().SetEntrys().push_back(
object);
418 object->SetDescr().Set().push_back(pub_desc);
439 for (
auto src_desc: src.
Get())
442 switch (src_desc->Which())
469 switch (src_desc->Which())
476 edit::CDBLink::MergeDBLink(user_obj, src_desc->GetUser());
487 if (new_desc.
Empty())
490 dest.Set().push_back(new_desc);
492 new_desc->
Assign(*src_desc);
494 if (dest.Set().empty())
546 bioseq->
SetId().clear();
547 bioseq->
SetId().push_back(
id);
557 for (
CBioseq_CI bioseq_it(h_entry); bioseq_it; ++bioseq_it)
561 for (
auto id_it: bioseq_it->GetBioseqCore()->GetId())
563 if (!id_it->IsGeneral())
continue;
565 const string& dbtag = id_it->GetGeneral().GetDb();
575 if (!comment.empty())
577 id_it->GetLabel(&comment);
592 std::vector<CSeq_feat*> cds;
593 std::vector<CSeq_feat*> rnas;
594 for (
CFeat_CI feat_it(h_entry); feat_it; ++feat_it)
596 if (!feat_it->IsSetData())
599 switch (feat_it->GetData().Which())
605 cds.push_back((
CSeq_feat*) &feat_it->GetOriginalFeature());
608 rnas.push_back((
CSeq_feat*) &feat_it->GetOriginalFeature());
633 if (seq_id.Empty()) continue;
635 const CObject_id* obj_id;
636 switch (seq_id->Which())
638 case CSeq_id::e_Local:
639 obj_id = &seq_id->GetLocal();
648 seq_id->SetGeneral().SetTag().SetId(obj_id->GetId());
651 string id = obj_id->GetStr();
652 seq_id->SetGeneral().SetTag().SetStr(id);
655 seq_id->SetGeneral().SetDb(db);
667 for (CSeq_feat::TQual::iterator it = quals.begin(); it != quals.end(); it++)
672 const string& qual_name = qual.
GetQual();
676 if (qual_name ==
"transcript_id") {
677 qual.
SetQual(
"orig_transcript_id");
680 if (qual_name ==
"protein_id") {
681 qual.
SetQual(
"orig_protein_id");
696 for (CSeq_feat::TQual::iterator it = quals.begin(); it != quals.end();)
698 if ((**it).GetQual() ==
"protein_id" ||
699 (**it).GetQual() ==
"transcript_id")
701 it = quals.erase(it);
714 bool need_update =
false;
715 switch(entry.
Which())
718 need_update |= x_ApplyCreateDate(entry);
762 for (
auto& it: descr.
Set())
774 return Ref(&it->SetOrg());
789 if (
source.IsSetTaxname())
791 name =
source.GetTaxname();
794 if (
source.IsSetOrgname())
796 if (
source.GetOrgname().GetFlatName(name))
799 if (
source.IsSetOrg() &&
source.GetOrg().IsSetOrgname())
801 if (
source.GetOrg().GetOrgname().GetFlatName(name))
807 if (it->GetOrg().IsSetOrgname())
809 if (it->GetOrg().GetOrgname().GetFlatName(name))
839 feature.
SetId().SetLocal().SetId(
id++);
846 size_t p =
m_cleanup.find_first_of(
"Dd");
847 if (p == string::npos)
854 x_CorrectCollectionDates(*
this, bioseq_set);
859 x_CorrectCollectionDates(*
this, bioseq);
880 bioseq_set.
SetDescr().Set().push_back(comment_desc);
888 bioseq.SetDescr().Set().push_back(comment_desc);
897 replace_if(begin(linkage_evidence), end(linkage_evidence),
898 [](
char c) {
return (
isspace(c) || c ==
'_'); },
'-');
901 unique(begin(linkage_evidence), end(linkage_evidence),
902 [](
char a,
char b) {
return (
a ==
b &&
b ==
'-');});
904 linkage_evidence.erase(it, linkage_evidence.end());
910 const string& message,
930 const string& evidenceString,
931 const string& filename,
932 const size_t& lineNum,
936 list<string> evidenceList;
939 for (
string evidence : evidenceList) {
940 string unnormalized_evidence = evidence;
944 evidenceSet.
insert(enum_val);
947 stringstream msgStream;
948 msgStream <<
"On line " << lineNum <<
" of " << filename <<
". ";
949 msgStream <<
"Unrecognized linkage-evidence value: " << unnormalized_evidence <<
".";
963 auto pLEStream = make_unique<CNcbiIfstream>(linkageEvidenceFilename, ios::binary);
965 if (!pLEStream || !pLEStream->is_open()) {
966 s_PostError(pEC,
"Failed to open " + linkageEvidenceFilename);
970 size_t lineNumber = 0;
971 while (pLEStream->good() && !pLEStream->eof()) {
974 getline(*pLEStream, line);
980 string countStr, evidenceStr;
985 stringstream msgStream;
986 msgStream <<
"On line " << lineNumber <<
" of " << linkageEvidenceFilename <<
". ";
987 msgStream << countStr <<
" is not a valid gap size.";
994 if (!evidenceSet.empty()) {
995 gapsizeToEvidence.emplace(count, move(evidenceSet));
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CSeqdesc & Set(bool skip_lookup=false)
static CRef< CSeqdesc > LocateDesc(const CSeq_descr &descr, CSeqdesc::E_Choice which)
static bool EraseDesc(CSeq_descr &descr, CSeqdesc::E_Choice which)
@Gb_qual.hpp User-defined methods of the data storage class.
static CLineErrorEx * Create(EProblem eProblem, EDiagSev eSeverity, int code, int subcode, const std::string &strSeqId, unsigned int uLine, const std::string &strErrorMessage=string(""), const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), const TVecOfLines &vecOfOtherLines=TVecOfLines())
Use this because the constructor is protected.
static CNcbiApplication * Instance(void)
Singleton method.
bool IsFtable(void) const
@Seq_descr.hpp User-defined methods of the data storage class.
void SetDescr(CSeq_descr &value)
CSeq_entry * GetParentEntry(void) const
namespace ncbi::objects::
string m_genome_center_id
void ApplyUpdateDate(objects::CSeq_entry &entry) const
CDiagnosticFileSet mDiagnosticWriters
static bool GetOrgName(string &name, const objects::CSeq_entry &entry)
CDataFileSet::fileset_type mCurrentDataOutputs
void SetSeqId(objects::CSeq_entry &entry) const
void ApplyFileTracks(objects::CSeq_entry &entry) const
void SetOutputFilename(eFiles kind, const string &filename)
void ApplyAccession(objects::CSeq_entry &entry) const
CRef< objects::CSeq_id > m_accession
CDataFileSet mDataWriters
static objects::CUser_object & SetUserObject(objects::CSeq_descr &descr, const CTempString &type)
void SetOutputFile(eFiles kind, ostream &ostr)
static CRef< objects::COrg_ref > GetOrgRef(objects::CSeq_descr &descr)
CRef< CSerialObject > CreateSubmitFromTemplate(CRef< objects::CSeq_entry > &object, CRef< objects::CSeq_submit > &submit) const
CRef< objects::CSeq_entry > m_entry_template
static void AddUserTrack(objects::CSeq_descr &SD, const string &type, const string &label, const string &data)
void CorrectCollectionDates(objects::CSeq_entry &entry) const
CDiagnosticFileSet::fileset_type mCurrentDiagnosticOutputs
void SmartFeatureAnnotation(objects::CSeq_entry &entry) const
CRef< objects::CSeq_submit > m_submit_template
string GenerateOutputFilename(eFiles kind, string_view basename=kEmptyStr) const
void MergeWithTemplate(objects::CSeq_entry &entry) const
void OpenDiagnosticOutputs()
void RenameProteinIdsQuals(objects::CSeq_feat &feature) const
void CloseDiagnosticOutputs()
void ApplyComments(objects::CSeq_entry &entry) const
void UpdateSubmitObject(CRef< objects::CSeq_submit > &submit) const
CRef< CSerialObject > CreateSeqEntryFromTemplate(CRef< objects::CSeq_entry > object) const
std::ostream & GetOstream(eFiles suffix)
bool ApplyCreateUpdateDates(objects::CSeq_entry &entry) const
string m_ResultsDirectory
void RemoveProteinIdsQuals(objects::CSeq_feat &feature) const
static void MergeSeqDescr(objects::CSeq_entry &dest, const objects::CSeq_descr &src, bool only_set)
static bool IsDBLink(const objects::CSeqdesc &desc)
static void UpdateTaxonFromTable(objects::CBioseq &bioseq)
void CopyFeatureIdsToComments(objects::CSeq_entry &entry) const
void MakeGenomeCenterId(objects::CSeq_entry &entry) const
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
virtual bool PutError(const ILineError &)=0
Store error in the container, and return true if error was stored fine, and return false if the calle...
@ eProblem_GeneralParsingError
void SetFilename(enum_type _enum, const std::string &filename)
void SetUseMT(bool use_mt)
fileset_type MakeNewFileset()
void Open(enum_type _enum, const std::string &filename)
iterator_bool insert(const value_type &val)
Include a standard set of the NCBI C++ Toolkit most basic headers.
unsigned int TSeqPos
Type for sequence locations and lengths.
CVersionInfo GetVersion(void) const
Get the program version information.
@ eDiag_Error
Error message.
virtual bool Remove(TRemoveFlags flags=eRecursive) const
Remove a directory entry.
static void SplitPath(const string &path, string *dir=0, string *base=0, string *ext=0)
Split a path string into its basic components.
@ fIgnoreMissing
Ignore missed entries.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define ENUM_METHOD_NAME(EnumName)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static TNumeric StringToNumeric(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to a numeric value.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
size_type find_first_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character in the matching string within the current string,...
static string & ToLower(string &str)
Convert string to lower case – string& version.
@ fConvErr_NoThrow
Do not throw an exception on error.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const
Transform time to string.
bool IsEmpty(void) const
Is time object empty (date and time)?
static bool ValidateString(const string &str, const CTimeFormat &fmt=kEmptyStr)
Validate if string match time format.
@ eCurrent
Use current time. See also CCurrentTime.
virtual string Print(void) const
Print version information.
@ eSubtype_collection_date
DD-MMM-YYYY format.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
void SetNum(TNum value)
Assign a value to Num data member.
const TStr & GetStr(void) const
Get the variant data.
void SetLabel(TLabel &value)
Assign a value to Label data member.
TStr & SetStr(void)
Select the variant.
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
const TType & GetType(void) const
Get the Type member data.
TSub & SetSub(void)
Select the variant.
void SetQual(const TQual &value)
Assign a value to Qual data member.
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
void SetComment(const TComment &value)
Assign a value to Comment data member.
void SetId(TId &value)
Assign a value to Id data member.
bool CanGetVal(void) const
Check if it is safe to call GetVal method.
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
vector< CRef< CGb_qual > > TQual
const TQual & GetQual(void) const
Get the Qual member data.
TQual & SetQual(void)
Assign a value to Qual data member.
void ResetQual(void)
Reset Qual data member.
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
TSet & SetSet(void)
Select the variant.
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSetDescr(void) const
Check if a value has been assigned to Descr data member.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSet(void) const
Check if variant Set is selected.
void ResetDescr(void)
Reset Descr data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_nuc_prot
nuc acid and coded proteins
@ eClass_genbank
converted genbank
void SetData(TData &value)
Assign a value to Data data member.
TId & SetId(void)
Assign a value to Id data member.
const TUser & GetUser(void) const
Get the variant data.
void ResetDescr(void)
Reset Descr data member.
void SetPub(TPub &value)
Assign a value to Pub data member.
TPub & SetPub(void)
Select the variant.
const Tdata & Get(void) const
Get the member data.
TComment & SetComment(void)
Select the variant.
void Select(E_Choice index, EResetVariant reset=eDoResetVariant)
Select the requested variant if needed.
TSource & SetSource(void)
Select the variant.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
bool IsSet(void) const
Check if a value has been assigned to data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
TUser & SetUser(void)
Select the variant.
Tdata & Set(void)
Assign a value to data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
TUpdate_date & SetUpdate_date(void)
Select the variant.
bool IsUser(void) const
Check if variant User is selected.
@ e_User
user defined object
@ e_Update_date
date of last update
@ e_Pub
a reference to the publication
@ e_Molinfo
info on the molecule and techniques
@ e_Create_date
date entry first created/released
@ e_Source
source of materials, includes Org-ref
void SetSub(TSub &value)
Assign a value to Sub data member.
void SetData(TData &value)
Assign a value to Data data member.
for(len=0;yy_str[len];++len)
if(yy_accept[yy_current_state])
void VisitAllSetandSeq(objects::CSeq_entry &entry, _Mset mset, _Mseq mseq)
void VisitAllBioseqs(objects::CSeq_entry &entry, _M &&m)
const CharType(& source)[N]
double f(double x_, const double &y_)
static const char * suffix[]
bool AssignLocalIdIfEmpty(CSeq_feat &feature, int &id)
static void s_NormalizeLinkageEvidenceString(string &linkage_evidence)
static CGapsEditor::TEvidenceSet s_ProcessEvidenceString(const string &evidenceString, const string &filename, const size_t &lineNum, ILineErrorListener *pEC)
static void s_PostError(ILineErrorListener *pEC, const string &message, size_t lineNum=0)
void g_LoadLinkageEvidence(const string &linkageEvidenceFilename, CGapsEditor::TCountToEvidenceMap &gapsizeToEvidence, ILineErrorListener *pEC)
static CS_CONTEXT * context