53 static string names[] = {
80 for (
n = 0;
n < sz;
n++) {
89 for (
size_t i = sz;
i >
n;
i--) {
97 auto&
all = node[
"all"];
98 for (
auto& qual_it: node.
GetMap() ) {
99 if (qual_it.first ==
"all")
continue;
102 auto& all_qual =
all[qual_it.first];
103 for (
auto& val_it: qual_it.second->GetMap())
106 for (
auto obj: val_it.second->GetObjects())
108 if (all_qual.Exist(*obj))
109 all_qual[
"*"].Add(*obj,
false);
111 all_qual.Add(*obj,
false);
120 if (node[
"all"][qual].Exist(obj)) {
121 node[
"all"][qual][
"*"].
Add(obj,
false);
124 node[
"all"][qual].
Add(obj,
false);
137 void* m_UserData =
nullptr;
142 "SOURCE_QUALS_ASNDISC",
149 m_Objs[
"all"].Add(*disc_obj);
153 if (biosrc->CanGetOrg()) {
154 const COrg_ref& org_ref = biosrc->GetOrg();
162 if (biosrc->CanGetSubtype()) {
163 for (
auto& it : biosrc->GetSubtype()) {
165 if (it->CanGetName()) {
171 if (biosrc->IsSetOrgMod()) {
172 for (
auto& it : biosrc->GetOrgname().GetMod()) {
180 if (biosrc->CanGetPcr_primers()) {
181 for (
auto& it : biosrc->GetPcr_primers().Get()) {
182 if (it->CanGetForward()) {
183 for (
auto& pr : it->GetForward().Get()) {
184 if (pr->CanGetName()) {
187 if (pr->CanGetSeq()) {
192 if (it->CanGetReverse()) {
193 for (
auto& pr : it->GetReverse().Get()) {
194 if (pr->CanGetName()) {
197 if (pr->CanGetSeq()) {
213 m_Objs[
"all"].Add(*disc_obj);
218 if (obj.CanGetOrg()) {
219 const COrg_ref& org_ref = obj.GetOrg();
229 if (obj.CanGetSubtype()) {
232 if ((*it)->CanGetName()) {
239 if (obj.IsSetOrgMod()) {
253 if (obj.CanGetPcr_primers()) {
255 if ((*it)->CanGetForward()) {
257 if ((*pr)->CanGetName()) {
258 AddObjToQualMap(
"fwd-primer-name", (*pr)->GetName(), *disc_obj, m_Objs);
260 if ((*pr)->CanGetSeq()) {
265 if ((*it)->CanGetReverse()) {
267 if ((*pr)->CanGetName()) {
268 AddObjToQualMap(
"rev-primer-name", (*pr)->GetName(), *disc_obj, m_Objs);
270 if ((*pr)->CanGetSeq()) {
298 bool unique = objs.size() == 1;
300 subtype =
"[n] source[s] [has] unique value[s] for " + qual;
303 subtype =
"[n] source[s] [has] " + qual +
" = " +
val;
313 report[subtype][
"1 source has " + qual +
" = " +
val].
Add(obj);
316 report[subtype].
Add(obj);
323 for (
auto& objs : all_objs) {
325 bool unique =
GetSubtypeStr(qual, objs.first, objs.second->GetObjects(), subtype);
326 for (
auto& obj : objs.second->GetObjects()) {
327 AddObjectToReport(subtype, qual, objs.first, unique, obj.GetNCObject(), report[diagnosis]);
335 for (
auto& objs : all_objs) {
337 bool unique =
GetSubtypeStr(qual, objs.first, objs.second, subtype);
338 for (
auto& obj : objs.second) {
339 AddObjectToReport(subtype, qual, objs.first, unique, obj.GetNCObject(), report[diagnosis]);
348 for (
auto& child : root.
GetMap()) {
357 static const size_t CEILING_VALUE = 1000000000;
367 float g_SesameStreetCutoff = 0.75;
368 return g_SesameStreetCutoff;
380 size_t total =
all.size();
383 all_missing[it] = it;
386 for (
auto it: the_map) {
387 if (it.first ==
"all") {
390 string qual = it.first;
394 size_t pres = m_Objs[
"all"][qual].GetObjects().size();
395 size_t mul = m_Objs[
"all"][qual][
"*"].GetObjects().size();
403 uniq += obj.size() == 1 ? 1 : 0;
404 string upper = jj.first;
408 capital[upper][jj.first].push_back(o);
411 string itfirst = it.first;
412 if (itfirst ==
"country" && use_geo_loc_name) {
413 itfirst =
"geo_loc_name";
417 diagnosis += pres == total ?
"all present" :
"some missing";
419 diagnosis +=
uniq == num ?
"all unique" : bins == 1 ?
"all same" :
"some duplicates";
420 diagnosis += mul ?
", some multi)" :
")";
423 if ((num != total || bins != 1)
424 && (itfirst ==
"collection-date" || itfirst ==
"country" || itfirst ==
"isolation-source" || itfirst ==
"strain" || itfirst ==
"isolate"
425 || itfirst ==
"taxname" || itfirst ==
"breed" || itfirst ==
"cultivar" || itfirst ==
"sex")) {
426 final_report[diagnosis].
Fatal();
429 if ((bins > capital.
size() || (num < total && capital.
size() == 1))
430 && (it.first ==
"country" || it.first ==
"collection-date" || it.first ==
"isolation-source")) {
432 if (bins > capital.
size()) {
433 for (
auto cap: capital) {
435 if (objs.
size() < 2) {
439 size_t best_count = 0;
441 fix->m_Qualifier = itfirst;
442 fix->m_User = m_private.m_UserData;
444 fix->m_Choice.push_back(x.first);
445 if (best_count < x.second.size()) {
446 best_count = x.second.size();
447 fix->m_Value = x.first;
451 for (
auto o: x.second) {
465 fix->m_Qualifier = itfirst;
466 fix->m_Value = sub.
begin()->first;
467 fix->m_User = m_private.m_UserData;
469 for (
auto o: missing) {
474 for (
auto o: missing) {
476 report[diagnosis][
"[n] source[s] [has] missing " + itfirst].
Add(*
r);
483 for (
auto o: missing) {
485 report[diagnosis][
"[n] source[s] [has] missing " + it.first].
Add(*
r);
489 static const size_t MAX_NUM_STR_LEN = 20;
490 for (
auto item: report[diagnosis].GetMap()) {
494 string leading_zeros(MAX_NUM_STR_LEN - sort_order_str.size(),
'0');
495 string subitem =
"[*" + leading_zeros + sort_order_str +
"*]" + item.first;
497 final_report[diagnosis];
498 if (item.second->GetCount()) {
499 final_report[diagnosis][subitem].
SetCount(item.second->GetCount());
500 final_report[diagnosis].
Incr();
504 final_report[diagnosis][subitem] = *item.second;
515 if (it->GetSubtype() == st) {
532 if (it->GetSubtype() == st) {
552 string val = fix->m_Value;
556 if (qual ==
"host") {
560 else if (qual ==
"strain") {
564 else if (qual ==
"country") {
568 else if (qual ==
"isolation-source") {
572 else if (qual ==
"collection-date") {
592 for (
auto& it : list) {
593 if (it->CanAutofix()) {
610 if (qual ==
"host") {
614 else if (qual ==
"strain") {
618 else if (qual ==
"country") {
622 else if (qual ==
"isolation-source") {
626 else if (qual ==
"collection-date") {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
const COrgName & GetOrgname(void) const
CConstRef< CObject > GetMoreInfo()
@OrgMod.hpp User-defined methods of the data storage class.
virtual vector< CRef< CReportItem > > GetSubitems() const =0
static void Add(TReportObjectList &list, TReportObjectSet &hash, CReportObj &obj, bool unique=true)
TReportObjectList & GetObjects()
CRef< CReportItem > Export(CDiscrepancyCore &test, bool unique=true) const
vector< string > m_Choice
CSourseQualsAutofixData()
static bool NCBI_UseGeoLocNameForCountry(void)
const_iterator begin() const
vector< CRef< CReportObj > > TReportObjectList
#define DISCREPANCY_AUTOFIX(name)
#define DISCREPANCY_CASE1(name, type, group, descr,...)
#define DISCREPANCY_SUMMARIZE(name)
static const struct name_t names[]
constexpr size_t ArraySize(const Element(&)[Size])
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
TObjectType * GetNCPointer(void) const THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string & ToUpper(string &str)
Convert string to upper case – string& version.
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
list< CRef< CSubSource > > TSubtype
void SetOrg(TOrg &value)
Assign a value to Org data member.
void SetName(const TName &value)
Assign a value to Name data member.
const TName & GetName(void) const
Get the Name member data.
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
@ eSubtype_collection_date
DD-MMM-YYYY format.
@ eSubtype_isolation_source
const TMod & GetMod(void) const
Get the Mod member data.
const TSubname & GetSubname(void) const
Get the Subname member data.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
bool CanGetTaxname(void) const
Check if it is safe to call GetTaxname method.
void SetSubname(const TSubname &value)
Assign a value to Subname data member.
@ eSubtype_gb_acronym
used by taxonomy database
@ eSubtype_gb_synonym
used by taxonomy database
@ eSubtype_other
ASN5: old-name (254) will be added to next spec.
@ eSubtype_nat_host
natural host of this specimen
@ eSubtype_gb_anamorph
used by taxonomy database
TSource & SetSource(void)
Select the variant.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static void AddObjectToReport(const string &subtype, const string &qual, const string &val, bool unique, CReportObj &obj, CReportNode &report)
static void AddObjToQualMap(const string &qual, const string &val, CReportObj &obj, CReportNode &node)
map< const CReportObj *, CRef< CReportObj > > TReportObjPtrMap
static bool GetSubtypeStr(const string &qual, const string &val, const TReportObjectList &objs, string &subtype)
static void SetSubsource(CRef< CBioSource > bs, CSubSource::ESubtype st, const string &s, size_t &added, size_t &changed)
map< string, TStringObjVectorMap > TStringStringObjVectorMap
static void ConvertDuplicates(CReportNode &node)
map< string, vector< CRef< CReportObj > > > TStringObjVectorMap
static void SetOrgMod(CRef< CBioSource > bs, COrgMod::ESubtype st, const string &s, size_t &added, size_t &changed)
static float g_GetSesameStreetCutoff()
static size_t GetSortOrderId(const string &subitem, CReportNode &node)
static string OrderQual(const string &s)
static size_t GetNumOfObjects(CReportNode &root)
static void AddObjsToReport(const string &diagnosis, CReportNode::TNodeMap &all_objs, const string &qual, CReportNode &report)
static CS_CONTEXT * context