51 virtual void Init(
void);
52 virtual int Run(
void);
60 argDescr->AddDefaultKey(
"gc",
"gcContent",
"calibrated organism %GC "
61 "content (ie. human: 50, rat: 48)",
63 argDescr->AddDefaultKey(
"cpg",
"obsexp",
64 "observed / expected CpG percentage",
66 argDescr->AddDefaultKey(
"win",
"window_size",
67 "width of sliding window",
69 argDescr->AddDefaultKey(
"len",
"min_length",
70 "minimum length of an island",
72 argDescr->AddOptionalKey(
"m",
"merge_isles",
73 "merge adjacent islands within the specified "
74 "distance of each other",
79 argDescr->AddOptionalKey(
"a",
"accession",
81 argDescr->AddOptionalKey(
"i",
"infile",
87 argDescr->SetUsageContext(
GetArguments().GetProgramBasename(),
88 "Scans sequences for CpG islands; uses algorithm based upon Takai & Jones, 2002. Output sent to stdout.\n",
false);
97 unsigned int len =
i.m_Stop -
i.m_Start + 1;
98 o <<
i.m_Start <<
"\t" <<
i.m_Stop <<
"\t" <<
99 (double) (
i.m_C +
i.m_G) /
len <<
"\t" <<
100 (double)
i.m_CG *
len / (
i.m_C *
i.m_G);
115 cerr <<
"Invalid seq-id: '" << acc <<
"': " << e.
what() << endl;
121 if (!bioseq_handle) {
122 cerr <<
"Bioseq load FAILED." << endl;
129 seqString.reserve(sv.
size());
132 CCpGIslands cpgIsles(seqString.data(), seqString.length(),
133 args[
"win"].AsInteger(), args[
"len"].AsInteger(),
134 args[
"gc"].AsInteger(), args[
"cpg"].AsInteger());
140 cout << acc <<
"\t" << *
i << endl;
160 if (localID[0] !=
'>') {
166 string seqString, lineBuff;
168 getline(
infile, lineBuff);
169 if (seqString.size() + lineBuff.size() > seqString.capacity())
170 seqString.reserve(seqString.capacity() * 2);
171 seqString += lineBuff;
175 CCpGIslands cpgIsles(seqString.data(), seqString.length(),
176 args[
"win"].AsInteger(), args[
"len"].AsInteger(),
177 args[
"gc"].AsInteger(), args[
"cpg"].AsInteger());
184 cout << localID <<
"\t" << *
i << endl;
208 cout.setf(ios::fixed, ios::floatfield);
209 cout <<
"# CpG islands. Win:" << args[
"win"].AsInteger()
210 <<
"; Min len:" << args[
"len"].AsInteger() <<
"; Min %GC:"
211 << args[
"gc"].AsDouble() <<
"; Min obs/exp CpG: "
212 << args[
"cpg"].AsDouble();
214 cout <<
"; Merge islands within: " << args[
"m"].AsInteger();
217 cout <<
"# label\tisle_start\tisle_stop\t%GC\tobs/exp CpG" << endl;
220 retCode =
ScanForCpGs(args[
"a"].AsString(), scope, args);
224 istream &
infile = args[
"i"].AsInputFile();
229 cerr <<
"Processing " << acc << endl;
247 int main(
int argc,
char** argv)
virtual int Run(void)
Run the application.
virtual void Init(void)
Initialize the application.
list< SCpGIsland > TIsles
void MergeIslesWithin(unsigned int range)
const TIsles & GetIsles(void) const
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
int ScanForCpGs(const string &acc, CScope &scope, const CArgs &args)
int main(int argc, char **argv)
CNcbiOstream & operator<<(CNcbiOstream &o, SCpGIsland i)
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
void DisableArgDescriptions(TDisableArgDesc disable=fDisableStdArgs)
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
size_t GetNExtra(void) const
Get the number of unnamed positional (a.k.a. extra) args.
@ eInputFile
Name of file (must exist and be readable)
@ eString
An arbitrary string.
@ eInteger
Convertible into an integer number (int or Int8)
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
void Critical(CExceptionArgs_Base &args)
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
void Reset(void)
Reset reference object.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application: