83 const string& output_extension,
84 const bool binary)
const;
109 bool process_set_of_any_type =
false)
const;
121 arg_desc->SetUsageContext(
123 "Split a single large instance of Seq-submit into smaller instances",
127 arg_desc->AddKey(
"i",
"InputFile",
128 "Filename for asn.1 input",
133 arg_desc->AddDefaultKey(
"b",
135 "Input asn.1 file in binary mode [T/F]",
144 string description =
"Filename stub for asn.1 outputs.\n";
145 description.append(
"Will append consecutive numbers and a file-type extension to this stub");
147 arg_desc->AddKey(
"o",
"OutputFile",
153 arg_desc->AddDefaultKey(
"s",
155 "Output asn.1 files in binary mode [T/F]",
162 arg_desc->AddDefaultKey(
"w",
164 "Wrap output Seq-entries within Seq-submits with Genbank set [T/F]",
171 arg_desc->AddAlias(
"l",
"logfile");
176 arg_desc->AddDefaultKey(
"n",
178 "Number of records in output Seq-submits",
182 string description =
"Generate output in sorted order \n";
183 description.append(
" 0 - unsorted (in order of appearance in input file;\n");
184 description.append(
" 1 - by sequence length from longest to shortest;\n");
185 description.append(
" 2 - by sequence length from shortest to longest;\n");
186 description.append(
" 3 - by contig/scaffold id.");
188 arg_desc->AddDefaultKey(
"r",
194 arg_desc->SetConstraint(
"r",
196 "0",
"1",
"2",
"3"));
201 arg_desc->AddFlag(
"e",
"Treat input as Seq-entry");
211 if (args[
"e"].AsBoolean() && args[
"w"].AsBoolean()) {
212 string err_msg =
"Incompatible parameters: do not use -w when -e is used";
219 string err_msg =
"Could not read input file";
225 bool input_as_seq_entry = args[
"e"].AsBoolean();
226 list<CRef<CSerialObject>> output_array;
231 string err_msg =
"Could not process input file";
236 const string output_stub = args[
"o"].AsString();
240 string output_extension =
"";
245 int output_index = 0;
246 unique_ptr<CObjectOStream> ostr;
247 bool binary = args[
"s"].AsBoolean();
252 for (
auto& it: output_array) {
264 string err_msg =
"Error while output results. ";
276 string extension =
"";
279 if (
arr.size() > 1) {
280 extension =
arr.back();
288 const string& output_stub,
291 const string& output_extension,
292 const bool binary)
const
294 if (output_stub.empty()) {
297 "Output stub not specified");
305 if (padded_index.size() < pad_width) {
306 padded_index =
padding.substr(0, pad_width - padded_index.size()) + padded_index;
310 string filename = output_stub +
"_" + padded_index +
"." + output_extension;
321 "Unable to open output file:" + filename);
329 if (
GetArgs()[
"e"].AsBoolean()) {
341 unique_ptr<CObjectIStream> istr;
360 template<
class Derived>
369 return static_cast<const Derived*
>(
this)->
compare_seq(b1, b2);
448 "Bioseq id not set");
457 "Cannot access bioseq id");
463 "Inconsistent bioseq ids");
476 TSeqEntryArray::iterator seq_entry_it = seq_entry_array.begin();
477 while (seq_entry_it != seq_entry_array.end()) {
483 if (seq_entry_it == seq_entry_array.end()) {
487 wrapped_entry_array.push_back(seq_entry);
497 TSeqPos bundle_size = args[
"n"].AsInteger();
499 bool wrap_entries = args[
"w"].AsBoolean();
516 stable_sort(seq_entry_array.begin(), seq_entry_array.end(),
SIdCompare());
523 for(
size_t i=0;
i<wrapped_entry_array.size(); ++
i) {
526 output_array.push_back(seqsub);
529 TSeqEntryArray::iterator seq_entry_it = seq_entry_array.begin();
530 while (seq_entry_it != seq_entry_array.end()) {
536 if (seq_entry_it == seq_entry_array.end()) {
540 output_array.push_back(seqsub);
565 return sub.
SetData().SetEntrys();
576 if (input_sub ==
nullptr || !input_sub->
IsEntrys()) {
577 ERR_POST(
"Seq-submit does not contain any entries");
620 if (input_entry ==
nullptr || !input_entry->
IsSet()) {
621 ERR_POST(
"Seq-entry does not contain any entries");
637 input_entry->
SetDescr().Assign(*upper_level_descr);
638 upper_level_descr.
Reset();
651 "Input file unspecified");
655 if (args[
"b"].AsBoolean()) {
659 string infile_str = args[
"i"].AsString();
662 if (pInputStream->fail())
666 "Could not create input stream for \"" + infile_str +
"\"");
677 "Unable to open input file \"" + infile_str +
"\"");
699 return find(begin(MULTIPLE_ALLOWED), end(MULTIPLE_ALLOWED), choice) != end(MULTIPLE_ALLOWED);
710 for (
auto& dst_descr: dst.
Get()) {
711 if (dst_descr->Which() == choice) {
713 if (dst_descr->Equals(descr)) {
729 for (
auto& descr: src.
Get())
735 dst.
Set().push_back(descr);
740 if (find_if(dst.
Set().begin(), dst.
Set().end(), [choice](
const CRef<CSeqdesc>& cur_descr) { return cur_descr->Which() == choice; } ) == dst.
Set().end()) {
741 dst.
Set().push_back(descr);
764 bool set_of_allowed_type = set_of_any_type_allowed;
765 if (!set_of_any_type_allowed) {
770 return set_of_allowed_type;
776 bool process_set_of_any_type)
const
788 ?
"Genbank set" :
"Pub-set";
790 string err_msg =
"Wrapper " + class_string +
"has non-empty annotation.";
797 new_descr.
Set().push_back(*it);
811 if (seq_descr.
IsSet()) {
820 seq_entry_array.push_back(new_entry);
831 int main(
int argc,
const char** argv)
User-defined methods of the data storage class.
const CBioseq & GetNucFromNucProtSet(void) const
const CSeq_id * GetLocalId() const
Find a local ID if present.
@Seq_descr.hpp User-defined methods of the data storage class.
const CSeq_descr & GetDescr(void) const
void SetDescr(CSeq_descr &value)
bool IsSetDescr(void) const
bool IsEntrys(void) const
Base class for all serializable objects.
virtual CRef< CSerialObject > BuildObject() const =0
virtual TSeqEntryList & GetListOfEntries(CSerialObject &obj) const =0
virtual CRef< CSerialObject > BuildObject() const
virtual TSeqEntryList & GetListOfEntries(CSerialObject &obj) const
CSeqEntryHelper(CSeq_entry &seq_entry)
void xFlattenSeqEntrys(CSeq_submit::TData::TEntrys &entries, TSeqEntryArray &seq_entry_array) const
bool xTryReadInputFile(CRef< CSerialObject > &obj) const
string xGetFileExtension(const string &filename) const
bool xTryProcessSeqSubmit(CRef< CSerialObject > &obj, list< CRef< CSerialObject >> &output_array) const
void xFlattenSeqEntry(CSeq_entry &seq_entry, const CSeq_descr &seq_descr, TSeqEntryArray &seq_entry_array, bool process_set_of_any_type=false) const
bool xTryProcessSeqEntries(const CObjectHelper &builder, TSeqEntryArray &seq_entry_array, list< CRef< CSerialObject >> &output_array) const
CObjectOStream * xInitOutputStream(const string &output_stub, const TSeqPos output_index, const TSeqPos pad_width, const string &output_extension, const bool binary) const
void xMergeSeqDescr(const CSeq_descr &src, CSeq_descr &dst) const
CRef< CSerialObject > xGetInputObject() const
void Init()
Initialize the application.
void xWrapSeqEntries(TSeqEntryArray &seq_entry_array, const TSeqPos &bundle_size, TSeqEntryArray &wrapped_entry_array) const
int Run()
Run the application.
CObjectIStream * xInitInputStream() const
bool xTryProcessSeqEntry(CRef< CSerialObject > &obj, list< CRef< CSerialObject >> &output_array) const
CSeq_submit & m_seq_submit
virtual CRef< CSerialObject > BuildObject() const
virtual TSeqEntryList & GetListOfEntries(CSerialObject &obj) const
CSeqSubmitHelper(CSeq_submit &seq_submit)
Include a standard set of the NCBI C++ Toolkit most basic headers.
static vector< string > arr
unsigned int TSeqPos
Type for sequence locations and lengths.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ eTakeOwnership
An object can take ownership of another.
@ eInputFile
Name of file (must exist and be readable)
@ eBoolean
{'true', 't', 'false', 'f'}, case-insensitive
@ eOutputFile
Name of file (must be writable)
@ eInteger
Convertible into an integer number (int or Int8)
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
@ eDS_ToStderr
To standard error stream.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
ESerialDataFormat
Data file format.
@ eSerial_AsnText
ASN.1 text.
@ eSerial_AsnBinary
ASN.1 binary.
int CompareOrdered(const CSeq_id &sid2) const
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
bool IsGeneral(void) const
Check if variant General is selected.
const TSeq & GetSeq(void) const
Get the variant data.
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
TSet & SetSet(void)
Select the variant.
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
bool IsSetDescr(void) const
Check if a value has been assigned to Descr data member.
bool IsSet(void) const
Check if variant Set is selected.
void SetClass(TClass value)
Assign a value to Class data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_genbank
converted genbank
@ eClass_pub_set
all the seqs from a single publication
list< CRef< CSeqdesc > > Tdata
const TInst & GetInst(void) const
Get the Inst member data.
const TId & GetId(void) const
Get the Id member data.
const Tdata & Get(void) const
Get the member data.
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
TLength GetLength(void) const
Get the Length member data.
list< CRef< CSeq_id > > TId
bool IsSetLength(void) const
length of sequence in residues Check if a value has been assigned to Length data member.
bool IsSet(void) const
Check if a value has been assigned to data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
Tdata & Set(void)
Assign a value to data member.
@ e_Het
cofactor, etc associated but not bound
@ e_Num
a numbering system
@ e_User
user defined object
@ e_Pub
a reference to the publication
@ e_Dbxref
xref to other databases
@ e_Comment
a more extensive comment
@ e_Modelev
model evidence for XM records
@ e_Region
overall region (globin locus)
@ e_Maploc
map location of this sequence
@ e_Name
a name for this sequence
list< CRef< CSeq_entry > > TEntrys
void SetSub(TSub &value)
Assign a value to Sub data member.
void SetData(TData &value)
Assign a value to Data data member.
static bool MultipleAllowed(CSeqdesc::E_Choice choice)
static bool NeedToInclude(const CSeqdesc &descr, const CSeq_descr &dst)
vector< CRef< CSeq_entry > > TSeqEntryArray
static bool NeedToProcess(const CSeq_entry &entry, bool set_of_any_type_allowed)
list< CRef< CSeq_entry > > TSeqEntryList
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines NCBI C++ diagnostic APIs, classes, and macros.
Defines NCBI C++ exception handling.
std::istream & in(std::istream &in_, double &x_)
int main(int argc, const char **argv)
bool operator()(const CRef< CSeq_entry > &e1, const CRef< CSeq_entry > &e2) const
bool compare_seq(const CBioseq &b1, const CBioseq &b2) const
bool compare_seq(const CBioseq &b1, const CBioseq &b2) const
CConstRef< CSeq_id > xGetGeneralId(const CBioseq &bioseq) const
CConstRef< CSeq_id > xGetId(const CBioseq &bioseq) const
bool compare_seq(const CBioseq &b1, const CBioseq &b2) const
bool compare_seq(const CBioseq &b1, const CBioseq &b2) const
static wxAcceleratorEntry entries[3]