99 #include <objtools/readers/source_mod_parser.hpp>
101 #include <wx/msgdlg.h>
172 if (!
id || !id_col) {
178 while (row < id_col->GetData().GetSize() && !found) {
180 objects::CSeq_id::E_SIC compare =
id->Compare(*row_id);
181 if (compare == objects::CSeq_id::e_YES) {
183 }
else if (compare == objects::CSeq_id::e_DIFF) {
192 while (row < id_col->GetData().GetSize() && !found) {
194 objects::CBioseq_Handle bsh = seh.GetBioseqHandle (*row_id);
197 ITERATE (objects::CBioseq::TId, id_it,
b->GetId()) {
198 objects::CSeq_id::E_SIC compare =
id->Compare(**id_it);
199 if (compare == objects::CSeq_id::e_YES) {
203 }
else if (compare == objects::CSeq_id::e_DIFF) {
221 if (!input_table || !input_table->IsSetColumns() || input_table->GetColumns().size() < 1) {
225 bool all_default =
true;
227 ITERATE (objects::CSeq_table::TColumns, cit, input_table->SetColumns()) {
229 if (!(*cit)->IsSetHeader() || !(*cit)->GetHeader().IsSetTitle()
230 || !
NStr::Equal(expected_title, (*cit)->GetHeader().GetTitle())) {
237 bool all_match =
true;
239 NON_CONST_ITERATE (objects::CSeq_table::TColumns, cit, input_table->SetColumns()) {
240 if (!(*cit)->IsSetData() || (*cit)->GetData().GetSize() == 0) {
244 string first_val =
"";
245 if ((*cit)->GetData().IsString()) {
246 first_val = (*cit)->GetData().GetString()[0];
247 }
else if ((*cit)->GetData().IsId()) {
251 if (all_default || !(*cit)->IsSetHeader() || !(*cit)->GetHeader().IsSetTitle() ||
NStr::IsBlank((*cit)->GetHeader().GetTitle())) {
253 (*cit)->SetHeader().SetTitle(first_val);
254 }
else if (!
NStr::Equal((*cit)->GetHeader().GetTitle(), first_val)) {
268 NON_CONST_ITERATE (objects::CSeq_table::TColumns, cit, input_table->SetColumns()) {
269 if (!(*cit)->IsSetData() || (*cit)->GetData().GetSize() == 0) {
271 }
else if ((*cit)->GetData().IsString()) {
272 string title = (*cit)->GetHeader().GetTitle();
278 vector<CRef<objects::CSeq_id> > new_ids;
279 for (
size_t j = 0; j < (*cit)->GetData().GetString().
size(); j++) {
280 string val = (*cit)->GetData().GetString()[j];
283 new_ids.push_back(
id);
284 }
catch (exception &) {
286 id->SetLocal().SetStr(
val);
287 new_ids.push_back(
id);
290 for (
size_t j = 0; j < new_ids.size(); j++) {
291 (*cit)->SetData().SetId().push_back(new_ids[j]);
301 NON_CONST_ITERATE (objects::CSeq_table::TColumns, cit, input_table->SetColumns()) {
302 if (!(*cit)->IsSetData() || (*cit)->GetData().GetSize() == 0) {
304 }
else if ((*cit)->GetData().IsInt()) {
305 vector<string> values;
306 for (
size_t j = 0; j < (*cit)->GetData().GetInt().
size(); j++) {
309 for (
size_t j = 0; j < values.size(); j++) {
310 (*cit)->SetData().SetString().push_back(values[j]);
312 }
else if ((*cit)->GetData().IsReal()) {
313 vector<string> values;
314 for (
size_t j = 0; j < (*cit)->GetData().GetInt().
size(); j++) {
317 for (
size_t j = 0; j < values.size(); j++) {
318 (*cit)->SetData().SetString().push_back(values[j]);
333 return "No sequence ID column selected!";
338 vector<CRef<objects::CSeq_id> > already_seen;
339 while (row < src_col->GetData().GetSize()) {
340 if (!
OneRowOk(src_col->GetData().GetId()[
row], dst_col, seh)) {
341 string id_label =
"";
347 if ((*sit)->Compare(*(src_col->GetData().GetId()[
row])) == objects::CSeq_id::e_YES) {
350 +
" refer to the same sequence; ";
354 already_seen.push_back(src_col->GetData().GetId()[
row]);
367 ITERATE (objects::CSeq_table::TColumns, cit,
table.GetColumns()) {
368 if ((*cit)->IsSetData() && (*cit)->GetData().IsId()) {
379 if (!id_col || !
id || !id_col->IsSetData() || !id_col->GetData().IsId()) {
383 while (row < id_col->GetData().GetSize()) {
384 if (id->Compare(*(id_col->GetData().GetId()[
row])) == objects::CSeq_id::e_YES) {
397 for (
size_t i = 0;
i < src_id->GetData().GetSize() && i < src_col->GetData().GetSize();
i++) {
400 if (dst_col->GetData().GetString().size() >
row
402 && !
NStr::Equal(dst_col->GetData().GetString()[
row], src_col->GetData().GetString()[
i])) {
415 int conflicting_columns = 0;
418 ITERATE (objects::CSeq_table::TColumns, cit, src.GetColumns()) {
419 if ((*cit)->IsSetData() && !(*cit)->GetData().IsId()) {
423 conflicting_columns++;
428 return conflicting_columns;
435 if (!(*cit)->IsSetData() || (*cit)->GetData().GetSize() == 0) {
438 int num_vals =
static_cast<int>((*cit)->GetData().GetSize());
439 if (
row >= num_vals) {
442 if ((*cit)->GetData().IsString()) {
443 for (
int j =
row; j < num_vals - 1; j++) {
444 (*cit)->SetData().SetString()[j] = (*cit)->GetData().GetString()[j + 1];
446 (*cit)->SetData().SetString().pop_back();
447 }
else if ((*cit)->GetData().IsId()) {
448 for (
int j =
row; j < num_vals - 1; j++) {
449 (*cit)->SetData().SetId()[j]->Assign(*((*cit)->GetData().GetId()[j + 1]));
451 (*cit)->SetData().SetId().pop_back();
452 }
else if ((*cit)->GetData().IsLoc()) {
453 for (
int j =
row; j < num_vals - 1; j++) {
454 (*cit)->SetData().SetLoc()[j]->Assign(*((*cit)->GetData().GetLoc()[j + 1]));
456 (*cit)->SetData().SetLoc().pop_back();
457 }
else if ((*cit)->GetData().IsInt()) {
458 for (
int j =
row; j < num_vals - 1; j++) {
459 (*cit)->SetData().SetInt()[j] = (*cit)->GetData().GetInt()[j + 1];
461 (*cit)->SetData().SetInt().pop_back();
462 }
else if ((*cit)->GetData().IsReal()) {
463 for (
int j =
row; j < num_vals - 1; j++) {
464 (*cit)->SetData().SetReal()[j] = (*cit)->GetData().GetReal()[j + 1];
466 (*cit)->SetData().SetReal().pop_back();
467 }
else if ((*cit)->GetData().IsBit()) {
468 for (
int j =
row; j < num_vals - 1; j++) {
469 (*cit)->SetData().SetBit()[j] = (*cit)->GetData().GetBit()[j + 1];
471 (*cit)->SetData().SetBit().pop_back();
472 }
else if ((*cit)->GetData().IsBytes()) {
473 for (
int j =
row; j < num_vals - 1; j++) {
474 (*cit)->SetData().SetBytes()[j] = (*cit)->GetData().GetBytes()[j + 1];
476 (*cit)->SetData().SetBytes().pop_back();
488 for (
i = 0;
i < src_id->GetData().GetSize() && i < src_col->GetData().GetSize();
i++) {
491 while (dst_col->SetData().SetString().size() < dst_id->GetData().GetSize()) {
492 dst_col->SetData().SetString().push_back (
"");
495 dst_col->SetData().SetString()[
row] = src_col->GetData().GetString()[
i];
496 }
catch (exception &) {
512 ITERATE (objects::CSeq_table::TColumns, cit, src.GetColumns()) {
513 if ((*cit)->IsSetData() && !(*cit)->GetData().IsId()) {
518 dst_col =
new objects::CSeqTable_column();
519 dst_col->SetHeader().Assign((*cit)->GetHeader());
520 if (!dst_col->GetHeader().IsSetTitle() && dst_col->GetHeader().IsSetField_name()) {
521 dst_col->SetHeader().SetTitle(dst_col->GetHeader().GetField_name());
523 dst->SetColumns().push_back(dst_col);
535 wxFileDialog table_save_file(parent,
wxT(
"Select a file"), save_file_dir, save_file_name,
538 wxFD_SAVE|wxFD_OVERWRITE_PROMPT);
540 if (table_save_file.ShowModal() == wxID_OK)
542 wxString path = table_save_file.GetPath();
543 wxString name = table_save_file.GetFilename();
550 wxMessageBox(
wxT(
"Please, select file name"),
wxT(
"Error"),
551 wxOK | wxICON_ERROR, parent);
559 wxMessageBox(
wxT(
"Cannot open file ")+name,
wxT(
"Error"),
560 wxOK | wxICON_ERROR, parent);
565 ITERATE (objects::CSeq_table::TColumns, cit, values_table->GetColumns()) {
566 exporter.
Field((*cit)->GetHeader().GetTitle());
569 for (
int i = 0;
i < values_table->GetNum_rows(); ++
i) {
570 ITERATE (objects::CSeq_table::TColumns, cit, values_table->GetColumns()) {
571 if (
i < (*cit)->GetData().GetSize()) {
572 if ((*cit)->GetData().IsId()) {
576 }
else if ((*cit)->GetData().IsString()) {
577 exporter.
Field((*cit)->GetData().GetString()[
i]);
581 exporter.
Field(blank);
587 save_file_dir = table_save_file.GetDirectory();
588 save_file_name = table_save_file.GetFilename();
601 size_t num_rows = col->GetData().GetSize();
605 if (col->GetData().IsString()) {
606 string val = col->GetData().GetString()[0];
610 for (
size_t row = 1;
row < num_rows;
row++) {
611 string new_val = col->GetData().GetString()[
row];
613 new_val = default_val;
620 }
else if (col->GetData().IsInt()) {
621 int val = col->GetData().GetInt()[0];
622 for (
size_t row = 1;
row < num_rows;
row++) {
623 if (
val != col->GetData().GetInt()[
row]) {
636 if (!col || !col->IsSetData()) {
639 size_t num_rows = col->GetData().GetSize();
643 if (col->GetData().IsString()) {
644 for (
size_t row = 0;
row < num_rows;
row++) {
645 string new_val = col->GetData().GetString()[
row];
658 if (!col || !col->IsSetData()) {
661 size_t num_rows = col->GetData().GetSize();
665 if (col->GetData().IsString()) {
666 for (
size_t row = 0;
row < num_rows;
row++) {
667 string new_val = col->GetData().GetString()[
row];
680 if (!col || !col->IsSetData()) {
683 size_t num_rows = col->GetData().GetSize();
687 if (col->GetData().IsString()) {
688 for (
size_t row = 0;
row < num_rows;
row++) {
689 string new_val = col->GetData().GetString()[
row];
703 if (ival.GetFrom() > 3) {
704 ival.SetFrom(ival.GetFrom() - 3);
709 size_t len = bsh.GetBioseqLength();
710 if (ival.GetTo() <
len - 4) {
711 ival.SetTo(ival.GetTo() + 3);
724 objects::CBioseq_Handle bsh_match;
725 bool id_match =
false;
730 ITERATE (objects::CBioseq::TId, id_it, protein->GetSeq().GetId()) {
731 bsh_match = seh.GetScope().GetBioseqHandle(**id_it);
740 for (objects::CBioseq_CI b_iter(seh, objects::CSeq_inst::eMol_na); b_iter ; ++b_iter ) {
748 if (nuc_count == 0) {
749 wxMessageBox(
wxT(
"You must import nucleotide sequences before importing protein sequences"),
wxT(
"Error"),
750 wxOK | wxICON_ERROR,
NULL);
752 }
else if (nuc_count > 1) {
753 wxMessageBox(
wxT(
"If you have more than one nucleotide sequence, each protein sequence must use the ID of the nucleotide sequence where the coding region is found."),
wxT(
"Error"),
754 wxOK | wxICON_ERROR,
NULL);
760 seq_id->Assign(*(bsh_match.GetSeqId()));
764 protein_entry->Assign(*protein);
769 protein_entry->SetSeq().ResetId();
770 protein_entry->SetSeq().SetId().push_back(product_id);
773 objects::CSeq_entry_Handle protein_h = seh.GetScope().AddTopLevelSeqEntry(*protein_entry);
781 bool found_start_codon =
false;
782 bool found_stop_codon =
false;
783 if (alignment && alignment->IsSetSegs() && alignment->GetSegs().IsSpliced()) {
785 seq_id->Assign(*match_loc->GetId());
786 ITERATE (objects::CSpliced_seg::TExons, exon_it, alignment->GetSegs().GetSpliced().GetExons()) {
788 (*exon_it)->GetGenomic_start(),
789 (*exon_it)->GetGenomic_end()));
790 if ((*exon_it)->IsSetGenomic_strand()) {
791 exon->SetStrand((*exon_it)->GetGenomic_strand());
793 cds_loc->SetMix().Set().push_back(exon);
795 ITERATE (objects::CSpliced_seg::TModifiers, mod_it,
796 alignment->GetSegs().GetSpliced().GetModifiers()) {
797 if ((*mod_it)->IsStart_codon_found()) {
798 found_start_codon = (*mod_it)->GetStart_codon_found();
800 if ((*mod_it)->IsStop_codon_found()) {
801 found_stop_codon = (*mod_it)->GetStop_codon_found();
806 if (!cds_loc->IsMix()) {
810 string error =
"Unable to find coding region location for protein sequence " +
label +
". Import failed.";
812 wxOK | wxICON_ERROR,
NULL);
815 if (cds_loc->GetMix().Get().size() == 1) {
817 cds_loc->Assign(*exon);
820 if (!found_start_codon) {
823 if (found_stop_codon) {
825 if (cds_loc->IsMix()) {
835 seh.GetScope().RemoveTopLevelSeqEntry(protein_h);
842 cmd->AddCommand(*add_seqentry);
844 new_cds->SetLocation(*cds_loc);
845 if (partial5 || partial3) {
846 new_cds->SetPartial(
true);
848 new_cds->SetData().SetCdregion();
850 product_id->Assign(*(protein_entry->GetSeq().GetId().front()));
851 new_cds->SetProduct().SetWhole(*product_id);
853 cmd->AddCommand(*add_cds);
861 wxMessageBox(
wxT(
"Cannot import a mix of protein and nucleotide sequences unless proteins are already packaged in nuc-prot sets"),
wxT(
"Error"),
862 wxOK | wxICON_ERROR,
NULL);
871 if (entry->IsSet()) {
872 NON_CONST_ITERATE(objects::CBioseq_set::TSeq_set, it, entry->SetSet().SetSeq_set()) {
875 }
else if (entry->IsSeq()) {
876 if (!entry->GetSeq().IsAa()) {
878 objects::CSourceModParser
smp(
879 objects::CSourceModParser::eHandleBadMod_Ignore );
882 = entry->GetSeq().GetClosestDescriptor(objects::CSeqdesc::e_Title);
884 string& title(
const_cast<string&
>(title_desc->GetTitle()));
886 smp.ApplyAllMods(entry->SetSeq());
898 if (entry->IsSeq()) {
899 if (entry->GetSeq().IsAa()) {
913 new_entry->Assign(*entry);
916 cmd->AddCommand(*subcmd);
919 }
else if (entry->IsSet()) {
920 if (entry->GetSet().GetClass() == objects::CBioseq_set::eClass_nuc_prot) {
926 new_entry->Assign(*entry);
929 cmd->AddCommand(*subcmd);
932 if (seh.IsSet() && seh.GetSet().IsEmptySeq_set() && entry->GetSet().IsSetClass()) {
934 new_set->Assign(*(seh.GetSet().GetCompleteBioseq_set()));
935 new_set->SetClass(entry->GetSet().GetClass());
937 cmd->AddCommand(*set_cmd);
939 if (seh.IsSet() && seh.GetSet().IsEmptySeq_set() && entry->GetSet().IsSetDescr()) {
940 ITERATE (objects::CBioseq_set::TDescr::Tdata, dit, entry->GetSet().GetDescr().Get()) {
944 cmd->AddCommand(*cmdAddDesc);
948 ITERATE (objects::CBioseq_set::TSeq_set, it, entry->GetSet().GetSeq_set()) {
950 new_entry->Assign(**it);
951 if (entry->GetSet().IsSetDescr()) {
952 ITERATE (objects::CBioseq_set::TDescr::Tdata, dit, entry->GetSet().GetDescr().Get()) {
955 new_entry->SetDescr().Set().push_back(desc);
976 bool same_num =
true;
979 objects::CBioseq_CI b_iter(entry, objects::CSeq_inst::eMol_na);
980 for ( ; b_iter ; ++b_iter ) {
982 objects::CFeat_CI fit (*b_iter);
988 counts.push_back(this_count);
989 if (this_count == 0) {
994 if (common_num == -1) {
995 common_num = this_count;
996 }
else if (common_num != this_count) {
1000 string summary =
"";
1002 if (
all && same_num) {
1005 vector<int>::iterator cit = counts.begin();
1006 while (cit != counts.end()) {
1008 vector<int>::iterator cit2 = cit;
1010 while (cit2 != counts.end()) {
1011 if (*cit == *cit2) {
1013 cit2 = counts.erase(cit2);
1019 if (this_num == 1) {
1022 summary +=
"s have ";
1033 summary =
"No features found.";
1043 vector<string> tf_strings;
1044 tf_strings.push_back(
"true");
1045 tf_strings.push_back(
"");
1076 if (choices.size() != 2) {
1092 if (
table->GetColumns().front()->IsSetData()) {
1093 row =
static_cast<int>(
table->GetColumns().front()->GetData().GetId().size());
1097 id->Assign(*
f.GetLocation().GetId());
1098 table->SetColumns()[0]->SetData().SetId().push_back(
id);
1101 if (
f.IsSetId() &&
f.GetId().IsLocal() &&
f.GetId().GetLocal().IsId()) {
1111 if (
f.IsSetComment()) {
1114 if (
f.IsSetExcept() &&
f.IsSetExcept_text()) {
1117 if (
f.IsSetQual()) {
1118 ITERATE(objects::CSeq_feat::TQual, it,
f.GetQual()) {
1119 if ((*it)->IsSetQual() && (*it)->IsSetVal()) {
1120 string qual = (*it)->GetQual();
1121 string val = (*it)->GetVal();
1150 if (!(*it)->RelaxedMatch(f1, f2)) {
1163 id_col->SetHeader().SetField_id(objects::CSeqTable_column_info::eField_id_location_id);
1165 table->SetColumns().push_back(id_col);
1169 objects::CFeat_CI fi(entry, objects::SAnnotSelector(feat.GetData().GetSubtype()));
1184 objects::CSeq_entry_Handle entry,
1185 objects::CSeqFeatData::ESubtype subtype,
1197 if (featid_col && featid_col->IsSetData() && featid_col->GetData().GetSize() >
row) {
1198 string feature_id = featid_col->GetData().GetString()[
row];
1207 feat->Assign(*(orig_feat.GetOriginalSeq_feat()));
1210 feat->SetId().SetLocal().SetId(
id);
1212 }
catch (exception &) {
1217 feat->SetLocation().SetInt().SetId().Assign(*(id_col->GetData().GetId()[
row]));
1218 int start = start_col->GetData().GetInt()[
row];
1219 int stop = stop_col->GetData().GetInt()[
row];
1221 feat->SetLocation().SetInt().SetFrom(stop - 1);
1222 feat->SetLocation().SetInt().SetTo(start - 1);
1225 feat->SetLocation().SetInt().SetFrom(start - 1);
1226 feat->SetLocation().SetInt().SetTo(stop - 1);
1233 && col->GetData().GetSize() >
row) {
1234 if (col->GetData().IsString()) {
1236 }
else if (col->GetData().IsInt()) {
1239 wxMessageBox(
wxT(
"Unrecognized table column type"),
wxT(
"Error"),
1240 wxOK | wxICON_ERROR,
NULL);
1245 objects::CSeqFeatData::E_Choice feature_type = objects::CSeqFeatData::GetTypeFromSubtype(subtype);
1246 switch (feature_type) {
1247 case objects::CSeqFeatData::e_Imp:
1249 const objects::CFeatList& feats(*objects::CSeqFeatData::GetFeatList());
1250 string key = feats.GetStoragekey(subtype);
1251 feat->SetData().SetImp().SetKey(
key);
1254 case objects::CSeqFeatData::e_Cdregion:
1255 feat->SetData().SetCdregion();
1261 bool change_existing =
false;
1262 if (feat->IsSetId() && feat->GetId().IsLocal() && feat->GetId().GetLocal().IsId()) {
1264 feat->GetId().GetLocal().GetId());
1267 change_existing =
true;
1270 if (!change_existing) {
1271 objects::CBioseq_Handle bh = entry.GetScope().GetBioseqHandle(*(id_col->GetData().GetId()[
row]));
1272 objects::CSeq_entry_Handle seh = bh.GetSeq_entry_Handle();
1286 vecColEditFactories.push_back(
1290 for (
size_t c = 0; c <
table->GetColumns().
front()->GetData().GetSize(); c++) {
1301 }
else if (problems.empty()) {
1302 problems.assign(add.begin(), add.end());
1305 while (pos < problems.size() && pos < add.size()) {
1308 problems[pos] +=
", ";
1310 problems[pos] += add[pos];
1314 while (pos < add.size()) {
1315 problems.push_back(add[pos]);
1325 vector<string>& problems)
1327 vector<string> add_values;
1328 for (
size_t j = 0; j < val_col->GetData().GetSize(); j++) {
1329 add_values.push_back(val_col->GetData().GetString()[j]);
1331 vector<string> add_problems = rule_col->
IsValid(add_values);
1338 auto num_table_columns = values_table->GetColumns().size();
1339 if (num_table_columns > 1) {
1341 if (!last_col->IsSetHeader()
1342 || !last_col->GetHeader().IsSetTitle()
1344 values_table->SetColumns().pop_back();
1353 if (
MatchColumnName((*col_it)->GetLabel(), col->GetHeader().GetTitle())) {
1358 if (
MatchColumnName((*col_it)->GetLabel(), col->GetHeader().GetTitle())) {
1369 vector<string> problems;
1373 if (
table->GetColumns()[
i]->IsSetData()
1374 && (
table->GetColumns()[
i]->GetData().IsString())) {
1384 problems_col->SetHeader().SetTitle(
kProblems);
1385 problems_col->SetData().SetString();
1386 table->SetColumns().push_back(problems_col);
1387 ITERATE(vector<string>, it, problems) {
1388 problems_col->SetData().SetString().push_back(*it);
1395 if (feat.IsSetId() && feat.GetId().IsLocal() && feat.GetId().GetLocal().IsId()) {
1407 objects::CFeat_CI fi(entry, objects::SAnnotSelector(feat.GetData().GetSubtype()));
1409 const objects::CSeq_feat& this_feat= *(fi->GetSeq_feat());
1421 vector<wxString> exts;
1422 exts.push_back(
wxT(
"asn"));
1423 exts.push_back(
wxT(
"sqn"));
1425 vector<wxString>::const_iterator it;
1426 for (it = exts.begin(); it != exts.end(); it++) {
1427 if (it != exts.begin())
1430 #ifdef NCBI_OS_MSWIN
1431 filter +=
wxT(
"*.") + *it;
1436 filter +=
wxT(
"*.") + *it;
1465 objects::CSeq_entry_EditHandle eh = entry.GetEditHandle();
1467 for (objects::CSeqdesc_CI desc_ci( entry, objects::CSeqdesc::e_User);
1470 const objects::CUser_object& u = desc_ci->GetUser();
1474 new_user->Assign (u);
1475 new_desc->SetUser(*new_user);
1476 objects::CUser_field& new_field = new_user->SetField(field_name);
1477 new_field.SetData().SetStr(
value);
1478 eh.RemoveSeqdesc(*desc_ci);
1479 eh.AddSeqdesc(*new_desc);
1487 new_desc->SetUser(*new_user);
1488 objects::CUser_field& new_field = new_user->SetField(field_name);
1489 new_field.SetData().SetStr(
value);
1490 entry.GetEditHandle().SetDescr().Set().push_back(new_desc);
1519 ITERATE(objects::CUser_object::TData, fit, user.GetData()) {
1520 if ((*fit)->IsSetLabel()
1521 && (*fit)->GetLabel().IsStr()
1523 && (*fit)->IsSetData()
1524 && (*fit)->GetData().IsStr()) {
1525 value = (*fit)->GetData().GetStr();
1537 string alt_email =
"";
1541 if (pos != string::npos) {
1551 string alt_email =
"";
1552 objects::CSeqdesc_CI it (entry, objects::CSeqdesc::e_User);
1567 objects::CUser_field& new_field = u.SetField(
kSMARTComment);
1568 new_field.SetData().SetStr(alt_email);
1575 objects::CSeq_entry_EditHandle eh = entry.GetEditHandle();
1576 for (objects::CSeqdesc_CI desc_ci( entry, objects::CSeqdesc::e_User);
1579 const objects::CUser_object& u = desc_ci->GetUser();
1583 new_user->Assign (u);
1584 new_desc->SetUser(*new_user);
1586 eh.RemoveSeqdesc(*desc_ci);
1587 eh.AddSeqdesc(*new_desc);
1595 new_desc->SetUser(*new_user);
1597 entry.GetEditHandle().SetDescr().Set().push_back(new_desc);
1608 if (pos == string::npos) {
1611 string tmp = email.substr(pos);
1613 if (pos == string::npos) {
1622 (
const objects::CSeq_entry& entry,
1630 if (entry.IsSeq()) {
1632 if (entry.GetSeq().IsAa()) {
1636 if (entry.GetSeq().IsSetDescr()) {
1637 ITERATE (objects::CBioseq::TDescr::Tdata, desc_it, entry.GetSeq().GetDescr().Get()) {
1638 if ((*desc_it)->IsTitle()) {
1639 title = (*desc_it)->GetTitle();
1644 ITERATE(objects::CBioseq::TId, id_it, entry.GetSeq().GetId()) {
1646 id->Assign(**id_it);
1647 id_col->SetData().SetId().push_back(
id);
1648 pos_col->SetData().SetInt().push_back(pos);
1649 np_col->SetData().SetString().push_back(np);
1650 title_col->SetData().SetString().push_back(title);
1654 }
else if (entry.IsSet() && entry.GetSet().IsSetSeq_set()) {
1655 ITERATE(objects::CBioseq_set::TSeq_set, seq_it, entry.GetSet().GetSeq_set()) {
1666 id_col->SetData().SetId();
1674 pos_col->SetHeader().SetTitle(
"Pos");
1675 pos_col->SetData().SetInt();
1683 np_col->SetHeader().SetTitle(
"NucOrProt");
1684 np_col->SetData().SetString();
1692 title_col->SetHeader().SetTitle(
"Title");
1693 title_col->SetData().SetString();
1704 table->SetColumns().push_back(id_col);
1709 table->SetColumns().push_back(pos_col);
1714 table->SetColumns().push_back(np_col);
1719 table->SetColumns().push_back(title_col);
1724 pos = pos_col->GetData().GetInt()[
row - 1] + 1;
1735 table->SetColumns().push_back(id_col);
1737 table->SetColumns().push_back(pos_col);
1739 table->SetColumns().push_back(np_col);
1741 table->SetColumns().push_back(title_col);
1760 if (id_col->GetData().IsString()) {
1761 str = id_col->GetData().GetString()[pos];
1772 problems->SetHeader().SetTitle(
"Problems");
1773 problems->SetData().SetString();
1774 vector<string> id_strings;
1775 bool any_problems =
false;
1776 bool any_nuc =
false;
1781 id_col = orig_id_col;
1786 for (
int i = 0;
i < new_ids->GetNum_rows();
i++) {
1788 id_strings.push_back(
str);
1793 problems->SetData().SetString().push_back(
kIdMissing);
1794 any_problems =
true;
1795 }
else if (
str.length() > max_len) {
1796 problems->SetData().SetString().push_back(
kIdTooLong);
1797 any_problems =
true;
1799 problems->SetData().SetString().push_back(
"");
1807 for (
unsigned int i = 0;
i < id_col->GetData().GetSize();
i++) {
1810 id_strings.push_back(
str);
1814 if (dup_ids.size() > 0) {
1815 for (
int i = 0;
i < new_ids->GetNum_rows();
i++) {
1817 ITERATE (vector<string>, s_it, dup_ids) {
1824 string val = problems->GetData().GetString()[
i];
1829 problems->SetData().SetString()[
i] =
val;
1830 any_problems =
true;
1835 if (!any_problems) {
1845 int num_duplicates = 0;
1846 int num_too_long = 0;
1849 ITERATE(objects::CSeqTable_column::TData::TString, s_it, problems->GetData().GetString()) {
1859 if (num_duplicates > 0) {
1861 if (num_too_long > 0) {
1865 if (num_too_long > 0) {
1875 bool any_changes =
false;
1881 new_str->SetHeader().SetTitle(
"new_id");
1882 new_str->SetData().SetString();
1884 size_t num_seq = pos_col->GetData().GetInt().back();
1886 if (old_pos_col && old_pos_col->GetData().GetInt().size() > 0) {
1887 num_seq += old_pos_col->GetData().GetInt().back();
1890 int reserve_char = ceil(
log ((
double)num_seq)) + 1;
1892 vector<string> id_strings;
1895 for (
int i = 0;
i < new_ids->GetNum_rows();
i++) {
1898 if (
str.length() > max_len) {
1899 str =
str.substr(0, max_len - reserve_char);
1900 new_str->SetData().SetString().push_back(
str);
1903 new_str->SetData().SetString().push_back(
"");
1905 id_strings.push_back(
str);
1910 for (
unsigned int i = 0;
i < id_col->GetData().GetSize();
i++) {
1913 id_strings.push_back(
str);
1917 if (dup_ids.size() > 0) {
1918 vector<int> high_offset;
1919 ITERATE (vector<string>, s_it, dup_ids) {
1920 high_offset.push_back(0);
1922 ITERATE(vector<string>, i_it, id_strings) {
1924 ITERATE (vector<string>, s_it, dup_ids) {
1928 if (
offset > high_offset[dup_pos]) {
1929 high_offset[dup_pos] =
offset;
1931 }
catch (exception &) {
1939 for (
int i = 0;
i < new_ids->GetNum_rows();
i++) {
1941 ITERATE (vector<string>, s_it, dup_ids) {
1943 high_offset[dup_pos]++;
1964 seq_id_it; ++seq_id_it)
1966 if( seq_id_it->Equals(*seq_id) ) {
1967 seq_id_it->Assign(*new_seq_id);
1983 objects::CSeq_entry *pEntry = bioseq.GetParentEntry();
1984 for( ; pEntry; pEntry = pEntry->GetParentEntry() ) {
1985 if( ! pEntry->IsSet() ) {
1988 objects::CBioseq_set & bioseq_set = pEntry->SetSet();
1989 if( ! bioseq_set.IsSetAnnot() ) {
1992 NON_CONST_ITERATE(objects::CBioseq_set::TAnnot, annot_it, bioseq_set.SetAnnot() ) {
2004 if (entry.IsSeq()) {
2006 objects::CSeqTable_column::TData::TId::iterator orig_it = id_col->SetData().SetId().begin();
2007 objects::CSeqTable_column::TData::TString::iterator new_it = new_id_col->SetData().SetString().begin();
2008 while(orig_it != id_col->SetData().SetId().end() && new_it != new_id_col->SetData().SetString().end()) {
2009 if ((*id_it)->Compare(**orig_it) == objects::CSeq_id::e_YES) {
2011 new_id->SetLocal().SetStr(*new_it);
2012 (*id_it)->Assign(*new_id);
2015 orig_it = id_col->SetData().SetId().erase(orig_it);
2016 new_it = new_id_col->SetData().SetString().erase(new_it);
2024 }
else if (entry.IsSet()) {
2025 NON_CONST_ITERATE(objects::CBioseq_set::TSeq_set, it, entry.SetSet().SetSeq_set()) {
2035 bool already_got_one =
false;
2036 objects::CFeat_CI
f(bh, objects::SAnnotSelector(objects::CSeqFeatData::e_Imp));
2038 if (
f->GetData().GetImp().IsSetKey()
2041 || (
f->IsSetComment() &&
NStr::Equal(
f->GetComment(), comment)))) {
2042 already_got_one =
true;
2047 return already_got_one;
2053 bool already_got_one =
false;
2054 objects::CFeat_CI
f(bh, objects::SAnnotSelector(subtype));
2056 already_got_one =
true;
2058 return already_got_one;
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CRef< objects::CSeq_id > GetNewProtId(objects::CBioseq_Handle bsh, int &offset, string &id_label, bool general_only)
void Field(const string &value)
static CRef< CFeatureSeqTableColumnBase > Create(const string &sTitle, objects::CSeqFeatData::ESubtype subtype=objects::CSeqFeatData::eSubtype_any)
virtual vector< string > IsValid(const vector< string > &values)
static wxString GetDialogFilter(EFileType fileType)
Output filtering parameters.
@ ePassThrough
all zeroes - no filtering
spliced protein to genomic alignment
CRef< objects::CSeq_align > FindAlignment(objects::CScope &scope, const objects::CSeq_id &protein, const objects::CSeq_loc &genomic, CProSplignOutputOptions output_options=CProSplignOutputOptions())
Aligns protein to a region on genomic sequence.
@ eWizardType_uncultured_samples
@ eWizardType_microsatellite
@ eWizardType_rrna_its_igs
Template class for iteration on objects of class C.
void SetMolinfoForProtein(CRef< objects::CSeq_entry > protein, bool partial5, bool partial3)
CRef< objects::CSeq_feat > AddProteinFeatureToProtein(CRef< objects::CSeq_entry > protein, bool partial5, bool partial3)
std::ofstream out("events_result.xml")
main entry point for tests
vector< CRef< CFeatureSeqTableColumnBase > > TFeatureSeqTableColumnList
static const int kNumSatelliteTypes
const string kPartialStart
static const string kSatelliteTypes[]
const string kPartialStop
const string kStopColLabel
const string kStartColLabel
const char * kSequenceIdColLabel
static const char * str(char *buf, int n)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
@ eNocase
Case insensitive compare.
static const char label[]
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
static const int kSkipColumns
bool MatchColumnName(string name1, string name2)
CRef< CSeqTable_column > FindSeqTableColumnByName(CRef< objects::CSeq_table > values_table, string column_name)
CRef< CSeqTable_column > AddIntColumnToTable(CRef< CSeq_table > table, string label)
CRef< CSeqTable_column > AddStringColumnToTable(CRef< CSeq_table > table, string label)
void AddValueToTable(CRef< CSeq_table > table, string subtype_name, string value, size_t row, edit::EExistingText existing_text=edit::eExistingText_replace_old)
bool RelaxedMatch(CRef< objects::CSeq_id > id1, CRef< objects::CSeq_id > id2)
#define row(bind, expected)
@ eExistingText_replace_old
int CombineTables(CRef< objects::CSeq_table > dst, const objects::CSeq_table &src)
bool AreAnyColumnValuesMissing(CRef< objects::CSeqTable_column > col)
int CountColumnRowConflicts(CRef< objects::CSeqTable_column > dst_id, CRef< objects::CSeqTable_column > dst_col, CRef< objects::CSeqTable_column > src_id, CRef< objects::CSeqTable_column > src_col)
int AddFeatureToSeqTable(const objects::CSeq_feat &f, CRef< objects::CSeq_table > table)
static CRef< objects::CSeqTable_column > s_MakeTitleCol()
static CRef< objects::CSeqTable_column > s_MakePosCol()
bool AddSeqEntryToSeqEntry(const objects::CSeq_entry *entry, objects::CSeq_entry_Handle seh, CRef< CCmdComposite > cmd, bool &has_nuc, bool &has_prot, bool create_general_only)
bool SaveTableFile(wxWindow *parent, wxString &save_file_dir, wxString &save_file_name, CRef< objects::CSeq_table > values_table)
CSourceRequirements::EWizardType GetWizardTypeFromName(string wizard_name)
int CountTableColumnConflicts(CRef< objects::CSeq_table > dst, const objects::CSeq_table &src)
CRef< objects::CUser_object > MakeWizardObject()
CRef< objects::CUser_object > MakeBankItSubmissionObject()
bool IsValidEmail(string email)
CRef< objects::CSeq_table > GetIdsFromSeqEntry(const objects::CSeq_entry &entry)
const string kSMARTComment
bool IsWizardObject(const objects::CUser_object &user)
static TWizardName s_WizardNames[]
bool IsBankItSubmissionObject(const objects::CUser_object &user)
CRef< CCmdComposite > AddFeatureSeqTableToSeqEntry(CRef< objects::CSeq_table > table, objects::CSeq_entry_Handle entry, objects::CSeqFeatData::ESubtype subtype, const TFeatureSeqTableColumnList &reqs, const TFeatureSeqTableColumnList &opts)
CRef< CCmdComposite > RemoveMatchingFeaturesWithoutLocalIntFeatureIdsFromSeqEntry(objects::CSeq_entry_Handle entry, const objects::CSeq_feat &feat, const TFeatureSeqTableColumnList &reqs)
static void AddProblemsToColumn(CRef< objects::CSeqTable_column > val_col, CRef< CFeatureSeqTableColumnBase > rule_col, vector< string > &problems)
void ParseTitlesToNewSeqEntries(CRef< objects::CSeq_entry > entry)
void AddProblemsColumnToFeatureSeqTable(CRef< objects::CSeq_table > table, const TFeatureSeqTableColumnList &reqs, const TFeatureSeqTableColumnList &opts)
bool IsSynonymForFalse(const string &val)
static CRef< objects::CSeqTable_column > s_MakeIdTableIdCol()
bool AreAnyColumnValuesPresent(CRef< objects::CSeqTable_column > col, string default_val)
static CRef< objects::CSeqTable_column > s_MakeNpCol()
void AddFeatureSeqTableRowToSeqEntry(CRef< objects::CSeq_table > table, objects::CSeq_entry_Handle entry, objects::CSeqFeatData::ESubtype subtype, unsigned int row, TFeatureSeqTableColumnList &vecColEditFactories, CRef< CCmdComposite > cmd)
void SetWizardFieldInSeqEntryNoUndo(objects::CSeq_entry_Handle entry, string field_name, string value)
string GetDescAlternateEmailAddress(const objects::CSeqdesc &desc)
wxString GetAsnSqnExtensions()
static void TransformSeqAnnot(objects::CSeq_annot &annot, CRef< objects::CSeq_id > seq_id, CRef< objects::CSeq_id > new_seq_id)
int FindRowForSeqId(CRef< objects::CSeqTable_column > id_col, CRef< objects::CSeq_id > id)
static void AddIdsFromSeqEntryToTable(const objects::CSeq_entry &entry, CRef< objects::CSeqTable_column > id_col, CRef< objects::CSeqTable_column > pos_col, CRef< objects::CSeqTable_column > np_col, CRef< objects::CSeqTable_column > title_col, int &pos, size_t &row)
CRef< CFeatureSeqTableColumnBase > GetColumnRuleForFeatureSeqTable(CRef< objects::CSeqTable_column > col, const TFeatureSeqTableColumnList &reqs, const TFeatureSeqTableColumnList &opts)
const string kBankItAltEmailPrefix
CRef< objects::CSeqTable_column > FindSeqIDColumn(const objects::CSeq_table &table)
bool IsSynonymForTrue(const string &val)
static void s_ReportMixError()
string SummarizeIdProblems(CRef< objects::CSeqTable_column > problems)
void DeleteTableRow(CRef< objects::CSeq_table > table, int row)
static void TransformSeqAnnots(objects::CBioseq &bioseq, CRef< objects::CSeq_id > seq_id, CRef< objects::CSeq_id > new_seq_id)
string GetAlternateEmailAddress(objects::CSeq_entry_Handle entry)
static int k_NumWizardNames
static bool OneRowOk(CRef< objects::CSeq_id > id, CRef< objects::CSeqTable_column > id_col, objects::CSeq_entry_Handle seh)
bool AlreadyHasFeature(objects::CBioseq_Handle bh, string key, string comment)
void RemoveProblemsColumn(CRef< objects::CSeq_table > values_table)
void SetAlternateEmailAddress(objects::CSeq_entry_Handle entry, string alt_email)
bool DoesColumnHaveValue(CRef< objects::CSeqTable_column > col, string val)
static void s_ExtendIntervalToEnd(objects::CSeq_interval &ival, objects::CBioseq_Handle bsh)
string GetFieldFromWizardObject(const objects::CUser_object &user, string field_name)
bool AreAllColumnValuesTheSame(CRef< objects::CSeqTable_column > col, string default_val)
void ApplyReplacementIds(objects::CSeq_entry &entry, CRef< objects::CSeq_table > table)
const string kIdDuplicate
vector< string > GetTrueFalseList()
string CheckFeatureAnnotation(objects::CSeq_entry_Handle entry, bool &is_ok)
bool AddProteinToSeqEntry(const objects::CSeq_entry *protein, objects::CSeq_entry_Handle seh, CRef< CCmdComposite > cmd, bool create_general_only)
void FixTableAfterImport(CRef< objects::CSeq_table > input_table)
const string kBankitSubmissionLabel
string FindBadRows(CRef< objects::CSeq_table > src, CRef< objects::CSeq_table > dst, objects::CSeq_entry_Handle seh)
static void CombineColumns(CRef< objects::CSeqTable_column > dst_id, CRef< objects::CSeqTable_column > dst_col, CRef< objects::CSeqTable_column > src_id, CRef< objects::CSeqTable_column > src_col)
CRef< objects::CSeq_table > GetFeaturesFromSeqEntry(objects::CSeq_entry_Handle entry, const objects::CSeq_feat &feat, const TFeatureSeqTableColumnList &reqs)
bool IsTrueFalseList(const vector< string > &choices)
static bool s_OkToAddFeature(const objects::CSeq_feat &f1, const objects::CSeq_feat &f2, const TFeatureSeqTableColumnList &reqs)
void SetUserAlternateEmailAddress(objects::CUser_object &u, string alt_email)
CRef< objects::CSeqTable_column > GetSeqIdProblems(CRef< objects::CSeq_table > new_ids, CRef< objects::CSeq_table > old_ids, size_t max_len)
bool HasLocalIntFeatureId(const objects::CSeq_feat &feat)
static string GetIdValueFromColumn(CRef< objects::CSeqTable_column > id_col, int pos)
CRef< objects::CSeqTable_column > GetReplacementSeqIds(CRef< objects::CSeq_table > new_ids, CRef< objects::CSeq_table > old_ids, size_t max_len)
void MergeStringVectors(vector< string > &problems, vector< string > add)
TWizardNameList GetWizardNameList()
vector< TWizardName > TWizardNameList
vector< string > FindNonUniqueStrings(const vector< string > &values)
pair< CSourceRequirements::EWizardType, string > TWizardName
const string kWizardLabel
wxString ToWxString(const string &s)