112 using namespace ncbi;
115 using namespace biosample_util;
133 const string& bioSampleAcc)
137 bioSampleAcc,
false,
nullptr);
151 for (
auto pSourceDesc: pDescriptorSet.
Get()) {
152 const CSeqdesc& sourceDesc = *pSourceDesc;
157 bioSample, sourceDesc.
GetSource(), diffs);
165 const string& fileName)
168 unique_ptr<CNcbiIfstream> pInStr(
new CNcbiIfstream(fileName.c_str(), ios::binary));
184 const string& bioSampleAcc,
190 cerr <<
"Differ: Unable to load biosample with given accession." <<
"\n";
201 const string& bioSampleAcc,
210 bioSampleAcc, bioSource, sampleSource, diffs)) {
219 const string& bioSampleFile,
234 const string& inFile)
237 unique_ptr<CNcbiIfstream> pInStr(
new CNcbiIfstream(inFile.c_str(), ios::binary));
258 virtual void Init(
void);
259 virtual int Run (
void);
262 int xCompareSeqEntry(
264 int xCompareSeqEntryAccession(
266 int xCompareSeqEntryAccessionList(
268 int xCompareSeqEntryFile(
276 vector<string> xGetBioSampleAccs(
294 mpScope.Reset(
new CScope(*mpObjmgr));
295 mpScope->AddDefaults();
305 arg_desc->AddDefaultKey(
308 "BioSource info to compare",
312 arg_desc->AddDefaultKey(
314 "biosample_accession",
315 "biosample, retrieve online by accession",
319 arg_desc->AddDefaultKey(
321 "biosample_local_filename",
322 "biosample, retrieve locally from filename",
326 arg_desc->AddDefaultKey(
328 "seq_entry_accession",
329 "seq-entry, to be used for both biosource and biosample",
333 arg_desc->AddDefaultKey(
334 "seq-entry-acc-list",
335 "seq_entry_accession_list",
336 "accession_list, containing multiple seq entry accessions",
340 arg_desc->AddDefaultKey(
343 "file, containing seq entry in ASN.1 format",
347 string prog_description =
"BioSample Checker\n";
348 arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
349 prog_description,
false);
352 SetupArgDescriptions(arg_desc.release());
363 string bioSampleAcc = args[
"biosample-acc"].AsString();
364 string bioSampleFile = args[
"biosample-file"].AsString();
365 string bioSourceFile = args[
"biosource"].AsString();
366 string seqEntryAcc = args[
"seq-entry-acc"].AsString();
367 string seqEntryAccList = args[
"seq-entry-acc-list"].AsString();
368 string seqEntryFile = args[
"seq-entry-file"].AsString();
370 if (seqEntryAcc.empty() && seqEntryAccList.empty() && seqEntryFile.empty()) {
371 if (bioSampleAcc.empty() && bioSampleFile.empty()) {
372 cerr <<
"Bad arguments: Need to uniquely specify biosample." << endl;
375 if (!bioSampleAcc.empty() && !bioSampleFile.empty()) {
376 cerr <<
"Bad arguments: Need to uniquely specify biosample." << endl;
379 if (bioSourceFile.empty()) {
380 cerr <<
"Bad arguments: Need to supply biosource." << endl;
385 if (!seqEntryFile.empty() && !seqEntryAcc.empty()) {
386 auto otherStuff = bioSampleAcc + bioSampleFile + bioSourceFile;
387 if (!otherStuff.empty()) {
388 cerr <<
"Bad arguments: seq-entry-acc or seq-entry-file cannot go with anything else."
394 if (!seqEntryAcc.empty()) {
395 return xCompareSeqEntryAccession(seqEntryAcc);
398 if (!seqEntryAccList.empty()) {
399 return xCompareSeqEntryAccessionList(seqEntryAccList);
402 if (!seqEntryFile.empty()) {
403 return xCompareSeqEntryFile(seqEntryFile);
407 if (!bioSampleAcc.empty()) {
410 if (!bioSampleFile.empty()) {
413 cerr <<
"Internal error: utility completed without doing anything." << endl;
421 const string& accession)
425 mpScope->ResetDataAndHistory();
443 const string& filename)
449 while (!ifstr.eof()) {
450 std::getline(ifstr, accession);
451 if (!accession.empty()) {
453 cout <<
"Differ: Processing accession \"" << accession <<
"\" ("
454 << counter <<
") ---" << endl << endl;
455 if (!xCompareSeqEntryAccession(accession)) {
466 const string& seqEntryFile)
472 bool bDeleteOnClose =
false;
473 pInputStream =
new CNcbiIfstream(seqEntryFile.c_str(), ios::binary);
474 bDeleteOnClose =
true;
478 cerr <<
"Differ: Unable to open input seq-entry file" <<
"\n";
481 unique_ptr<CObjectIStream> pIs(pI);
487 cerr <<
"Differ: Unable to load input seq-entry from file" <<
"\n";
490 return xCompareSeqEntry(pSeqEntry);
496 const string& accession)
501 pSeqEntry = xLoadSeqEntry(accession);
504 cerr <<
"Loader: Unable to retrieve seq_entry \"" << accession
508 return xCompareSeqEntry(pSeqEntry);
519 cerr <<
"Differ: Unable to load input seq-entry from file." <<
"\n";
522 auto pBioSource = xGetBioSource(pSeqEntry);
524 cerr <<
"Differ: Given sequence does not have a biosource." << endl;
527 auto bioSampleAccessions = xGetBioSampleAccs(pSeqEntry);
528 if (bioSampleAccessions.empty()) {
529 cerr <<
"Differ: Given sequence does not contain biosample links."
535 for (
auto bioSampleAcc: bioSampleAccessions) {
537 bioSampleAcc, *pBioSource, fusedSource, diffs)) {
554 for (
auto descriptor: descriptors) {
555 if (descriptor->IsSource()) {
557 pBioSource->
Assign(descriptor->GetSource());
572 vector<string> bioSampleAccs;
574 for (
auto descriptor: descriptors) {
575 if (!descriptor->IsUser() ||
576 descriptor->GetUser().GetType().GetStr() !=
"DBLink") {
579 auto descriptorData = descriptor->GetUser().GetData();
580 for (
auto entry: descriptorData) {
581 if (!entry->CanGetLabel() || entry->GetLabel().GetStr() !=
"BioSample") {
584 if (!entry->CanGetData()) {
587 auto&
data = entry->GetData();
589 bioSampleAccs.push_back(
data.GetStr());
593 bioSampleAccs.insert(
594 bioSampleAccs.end(),
data.GetStrs().begin(),
data.GetStrs().end());
599 return bioSampleAccs;
604 int main(
int argc,
const char* argv[])
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
vector< CRef< CBiosampleFieldDiff > > TBiosampleFieldDiffList
CRef< CSeq_descr > GetBiosampleData(const string &accession, bool use_dev_server=false, TBioSamples *cache=NULL)
void GenerateDiffListFromBioSource(const CSeq_descr &bioSample, const CBioSource &bioSource, TBiosampleFieldDiffList &diffs)
void PrettyPrint(const TBiosampleFieldDiffList &diffList, CNcbiOstream &ostr, size_t keyWidth=20, size_t valueWidth=40)
void PrintDiffList(const string &source, const TBiosampleFieldDiffList &diffList, CNcbiOstream &ostr)
CRef< CSeq_descr > LoadBioSampleFromAcc(const string &bioSampleAcc)
const char * BSDIFF_APP_VER
CRef< CSeq_descr > LoadBioSampleFromFile(const string &fileName)
void GenerateDiffListFromDescriptors(const CSeq_descr &bioSample, const CSeq_descr &pDescriptorSet, TBiosampleFieldDiffList &diffs)
CRef< CSeq_descr > LoadBioSource(const string &inFile)
int CompareBioSampleAccessionToDescriptors(const string &bioSampleAcc, CRef< CSeq_descr > pDescriptorSet)
int main(int argc, const char *argv[])
int CompareBioSampleAccessionToBioSource(const string &bioSampleAcc, const CBioSource &bioSource)
int CompareBioSampleFileToDescriptors(const string &bioSampleFile, CRef< CSeq_descr > pDescriptorSet)
CRef< CSeq_entry > xLoadSeqEntry(const string &)
virtual int Run(void)
Run the application.
int xCompareSeqEntryFile(const string &)
int xCompareSeqEntry(const CRef< CSeq_entry > &)
CRef< CObjectManager > mpObjmgr
int xCompareSeqEntryAccessionList(const string &)
CRef< CBioSource > xGetBioSource(CRef< CSeq_entry >)
int xCompareSeqEntryAccession(const string &)
vector< string > xGetBioSampleAccs(CRef< CSeq_entry >)
virtual void Init(void)
Initialize the application.
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
@Seq_descr.hpp User-defined methods of the data storage class.
const CSeq_descr & GetDescr(void) const
Include a standard set of the NCBI C++ Toolkit most basic headers.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
@ eTakeOwnership
An object can take ownership of another.
@ eNoOwnership
No ownership is assumed.
@ eString
An arbitrary string.
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
ESerialDataFormat
Data file format.
@ eSerial_AsnText
ASN.1 text.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
void Read(const CObjectInfo &object)
Read object of know type.
pair< TObjectPtr, TTypeInfo > ObjectInfo(C &obj)
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
void Reset(void)
Reset reference object.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
void Run(void)
Enter the main loop.
void CONNECT_Init(const IRWRegistry *reg=0, CRWLock *lock=0, TConnectInitFlags flag=eConnectInit_OwnNothing, FSSLSetup ssl=0)
Init [X]CONNECT library with the specified "reg" and "lock" (ownership for either or both can be deta...
const TSource & GetSource(void) const
Get the variant data.
bool IsSource(void) const
Check if variant Source is selected.
const Tdata & Get(void) const
Get the member data.
Lightweight interface for getting lines of data with minimal memory copying.
Magic spell ;-) needed for some weird compilers... very empiric.
const CharType(& source)[N]
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
#define GetArgs
Avoid preprocessor name clash with the NCBI C Toolkit.
Defines command line argument related classes.
Defines unified interface to application:
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
Useful/utility classes and methods.
C++ I/O stream wrappers to compress/decompress data on-the-fly.