42 #define NCBI_USE_ERRCODE_X ConnServ_WorkerNode
51 : m_JobsStarted(0), m_JobsSucceeded(0), m_JobsFailed(0), m_JobsReturned(0),
52 m_JobsRescheduled(0), m_JobsCanceled(0), m_JobsLost(0),
53 m_MaxJobsAllowed(0), m_MaxFailuresAllowed(0),
74 LOG_POST_X(1,
"The maximum number of allowed jobs (" <<
76 "Sending the shutdown request." );
115 if (event !=
eJobStarted && !grid_globals.IsShuttingDown()) {
118 if (total_memory_limit > 0) {
121 ERR_POST(
"Could not check self memory usage" );
122 }
else if (memory_usage.
total > total_memory_limit) {
124 ") is above the configured limit (" <<
125 total_memory_limit <<
")");
126 const auto kExitCode = 100;
147 os << it->first->GetJobKey() <<
" \"" <<
149 "\" -- running for " <<
150 (
int) it->second.elasped_time.Elapsed() <<
" seconds.";
151 if (it->second.is_stuck)
152 os <<
"!!! LONG RUNNING JOB !!!";
163 if (!it->second.is_stuck) {
165 const auto job_key = it->first->GetJobKey();
166 ERR_POST_X(3,
"An infinite loop is detected in job " << job_key);
169 it->second.is_stuck =
true;
177 ERR_POST_X(4,
"All jobs are in infinite loops. "
178 "Server is shutting down.");
189 if (!it->second.is_stuck)
192 job.
error_msg =
"Job execution time exceeded " +
194 unsigned(it->second.elasped_time.Elapsed()));
207 m_ReuseJobObject(
false),
225 return global_instance.
Get();
unsigned int GetNewJobNumber()
CAtomicCounter_WithAutoInit m_JobsStarted
CWNJobWatcher & GetJobWatcher()
void InterruptUDPPortListening()
SGridWorkerNodeImpl * m_Worker
unique_ptr< CWNJobWatcher > m_JobWatcher
const CTime & GetStartTime() const
static CGridGlobals & GetInstance()
@ eNormalShutdown
Normal shutdown was requested.
@ eShutdownImmediate
Urgent shutdown was requested.
T & Get(void)
Create the variable if not created yet, return the reference.
virtual void Notify(const CWorkerNodeJobContext &job, EEvent event)
void CheckForInfiniteLoop()
void x_KillNode(CGridWorkerNode)
unsigned int m_JobsStarted
unsigned int m_MaxFailuresAllowed
unsigned int m_InfiniteLoopTime
unsigned int m_JobsFailed
unsigned int m_JobsRescheduled
unsigned int m_JobsSucceeded
void Print(CNcbiOstream &os) const
unsigned int m_JobsCanceled
unsigned int m_JobsReturned
unsigned int m_MaxJobsAllowed
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
TValue Add(int delta) THROWS_NONE
Atomically add value (=delta), and return new counter value.
CDiagContext_Extra & Print(const string &name, const string &value)
The method does not print the argument, but adds it to the string.
#define LOG_POST_X(err_subcode, message)
CDiagContext & GetDiagContext(void)
Get diag context instance.
CDiagContext_Extra Extra(void) const
Create a temporary CDiagContext_Extra object.
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Warning(CExceptionArgs_Base &args)
CGridWorkerNode GetWorkerNode() const
void ReturnJob(const CNetScheduleJob &job)
Switch the job back to the "Pending" status so that it can be run again on a different worker node.
CNetScheduleExecutor GetNSExecutor() const
void PutFailure(const CNetScheduleJob &job, bool no_retries=false)
Submit job failure diagnostics.
Uint8 GetTotalMemoryLimit() const
Get total memory limit (automatic restart if node grows more than that)
uint64_t Uint8
8-byte (64-bit) unsigned integer
static TPid GetPid(void)
Get process identifier (pid) for the current process.
size_t total
Total memory usage.
bool Kill(unsigned long timeout=kDefaultKillTimeout)
Terminate process.
static bool GetMemoryUsage(SMemoryUsage &usage)
Get current process memory usage.
pid_t TPid
Process identifier (PID) and process handle.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
EIO_Status Send(const void *data, size_t datalen, const string &host=string(), unsigned short port=0)
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
static string PrintableString(const CTempString str, TPrintableMode mode=fNewLine_Quote|fNonAscii_Passthru)
Get a printable version of the specified string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const
Transform time to string.
CTime GetFastLocalTime(void)
Quick and dirty getter of local time.
unsigned int
A callback function used to compare two keys in a database.
Definition of all error codes used in connect services library (xconnserv.lib and others).
Static variables safety - create on demand, destroy on application termination.
Defines NCBI C++ diagnostic APIs, classes, and macros.
Multi-threading – mutexes; rw-locks; semaphore.
Process memory usage information, in bytes.