/* * Project: MoleCuilder * Description: creates and alters molecular systems * Copyright (C) 2011 University of Bonn. All rights reserved. * Please see the LICENSE file or "Copyright notice" in builder.cpp for details. */ /* * \file FragmentScheduler.cpp * * This file strongly follows the Serialization example from the boost::asio * library (see server.cpp) * * Created on: Oct 19, 2011 * Author: heber */ // include config.h #ifdef HAVE_CONFIG_H #include #endif // boost asio needs specific operator new #include #include "CodePatterns/MemDebug.hpp" #include #include #include #include #include "Connection.hpp" // Must come before boost/serialization headers. #include #include "CodePatterns/Info.hpp" #include "CodePatterns/Log.hpp" #include "Controller/Commands/EnrollInPoolOperation.hpp" #include "Jobs/MPQCCommandJob.hpp" #include "Jobs/SystemCommandJob.hpp" #include "JobId.hpp" #include "FragmentScheduler.hpp" FragmentJob::ptr FragmentScheduler::WorkerListener_t::NoJob(new SystemCommandJob(std::string("/bin/true"), std::string("dosomething"), JobId::NoJob)); /** Helper function to enforce binding of FragmentWorker to possible derived * FragmentJob classes. */ void dummyInit() { SystemCommandJob("/bin/false", "something", JobId::IllegalJob); MPQCCommandJob("nofile", JobId::IllegalJob); } /** Constructor of class FragmentScheduler. * * We setup both acceptors to accept connections from workers and Controller. * * \param io_service io_service of the asynchronous communications * \param workerport port to listen for worker connections * \param controllerport port to listen for controller connections. */ FragmentScheduler::FragmentScheduler(boost::asio::io_service& io_service, unsigned short workerport, unsigned short controllerport) : WorkerListener(io_service, workerport, JobsQueue, pool, boost::bind(&FragmentScheduler::sendJobToWorker, boost::ref(*this), _1, _2)), ControllerListener(io_service, controllerport, JobsQueue, boost::bind(&Listener::initiateSocket, boost::ref(WorkerListener))), connection(io_service), sendJobOp(connection) { Info info(__FUNCTION__); // listen for controller ControllerListener.initiateSocket(); // only initiate socket if jobs are already present if (JobsQueue.isJobPresent()) { WorkerListener.initiateSocket(); } } /** Handle a new worker connection. * * We store the given address in the pool. * * \param e error code if something went wrong * \param conn reference with the connection */ void FragmentScheduler::WorkerListener_t::handle_Accept(const boost::system::error_code& e, connection_ptr conn) { Info info(__FUNCTION__); if (!e) { // Successfully accepted a new connection. // read address conn->async_read(address, boost::bind(&FragmentScheduler::WorkerListener_t::handle_ReadAddress, this, boost::asio::placeholders::error, conn)); } else { // An error occurred. Log it and return. Since we are not starting a new // accept operation the io_service will run out of work to do and the // server will exit. Exitflag = ErrorFlag; ELOG(0, e.message()); } } /** Handle having received Worker's address * * \param e error code if something went wrong * \param conn reference with the connection */ void FragmentScheduler::WorkerListener_t::handle_ReadAddress(const boost::system::error_code& e, connection_ptr conn) { Info info(__FUNCTION__); if (!e) { // Successfully accepted a new connection. // read address conn->async_read(choice, boost::bind(&FragmentScheduler::WorkerListener_t::handle_ReadChoice, this, boost::asio::placeholders::error, conn)); } else { // An error occurred. Log it and return. Since we are not starting a new // accept operation the io_service will run out of work to do and the // server will exit. Exitflag = ErrorFlag; ELOG(0, e.message()); } } /** Controller callback function to read the choice for next operation. * * \param e error code if something went wrong * \param conn reference with the connection */ void FragmentScheduler::WorkerListener_t::handle_ReadChoice(const boost::system::error_code& e, connection_ptr conn) { Info info(__FUNCTION__); if (!e) { LOG(1, "INFO: Received request for operation " << choice << "."); // switch over the desired choice read previously switch(choice) { case NoWorkerOperation: { ELOG(1, "WorkerListener_t::handle_ReadChoice() - called with NoOperation."); break; } case EnrollInPool: { if (pool.presentInPool(address)) { ELOG(1, "INFO: worker "+toString(address)+" is already contained in pool."); enum EnrollInPoolOperation::EnrollFlag flag = EnrollInPoolOperation::Fail; conn->async_write(flag, boost::bind(&FragmentScheduler::WorkerListener_t::handle_enrolled, this, boost::asio::placeholders::error, conn)); } else { // insert as its new worker LOG(1, "INFO: Adding " << address << " to pool ..."); pool.addWorker(address); enum EnrollInPoolOperation::EnrollFlag flag = EnrollInPoolOperation::Success; conn->async_write(flag, boost::bind(&FragmentScheduler::WorkerListener_t::handle_enrolled, this, boost::asio::placeholders::error, conn)); break; } case SendResult: { if (pool.presentInPool(address)) { // check whether its priority is busy_priority if (pool.isWorkerBusy(address)) { conn->async_read(result, boost::bind(&FragmentScheduler::WorkerListener_t::handle_ReceiveResultFromWorker, this, boost::asio::placeholders::error, conn)); } else { ELOG(1, "Worker " << address << " trying to send result who is not marked as busy."); conn->async_read(result, boost::bind(&FragmentScheduler::WorkerListener_t::handle_RejectResultFromWorker, this, boost::asio::placeholders::error, conn)); } } else { ELOG(1, "Worker " << address << " trying to send result who is not in pool."); conn->async_read(result, boost::bind(&FragmentScheduler::WorkerListener_t::handle_RejectResultFromWorker, this, boost::asio::placeholders::error, conn)); } break; } case RemoveFromPool: { if (pool.presentInPool(address)) { // removing present worker pool.removeWorker(address); } else { ELOG(1, "Shutting down Worker " << address << " not contained in pool."); } break; } default: Exitflag = ErrorFlag; ELOG(1, "WorkerListener_t::handle_ReadChoice() - called with no valid choice."); break; } } // restore NoOperation choice such that choice is not read twice choice = NoWorkerOperation; } else { // An error occurred. Log it and return. Since we are not starting a new // accept operation the io_service will run out of work to do and the // server will exit. Exitflag = ErrorFlag; ELOG(0, e.message()); } if (JobsQueue.isJobPresent()) { // Start an accept operation for a new Connection only when there // are still jobs present initiateSocket(); } } /** Callback function when new worker has enrolled. * * \param e error code if something went wrong * \param conn reference with the connection */ void FragmentScheduler::WorkerListener_t::handle_enrolled(const boost::system::error_code& e, connection_ptr conn) { Info info(__FUNCTION__); if (!e) { FragmentJob::ptr job; if (JobsQueue.isJobPresent()) { job = JobsQueue.popJob(); } else { job = NoJob; } callback_sendJobToWorker(pool.getNextIdleWorker(), job); } else { // An error occurred. Log it and return. Since we are not starting a new // accept operation the io_service will run out of work to do and the // server will exit. Exitflag = ErrorFlag; ELOG(0, e.message()); } } /** Callback function when result has been received. * * \param e error code if something went wrong * \param conn reference with the connection */ void FragmentScheduler::WorkerListener_t::handle_ReceiveResultFromWorker(const boost::system::error_code& e, connection_ptr conn) { Info info(__FUNCTION__); LOG(1, "INFO: Received result for job #" << result->getId() << " ..."); // and push into queue ASSERT(result->getId() != (JobId_t)JobId::NoJob, "WorkerListener_t::handle_ReceiveResultFromWorker() - result received has NoJob id."); ASSERT(result->getId() != (JobId_t)JobId::IllegalJob, "WorkerListener_t::handle_ReceiveResultFromWorker() - result received has IllegalJob id."); // place id into expected if ((result->getId() != (JobId_t)JobId::NoJob) && (result->getId() != (JobId_t)JobId::IllegalJob)) JobsQueue.pushResult(result); // mark as idle pool.unmarkWorkerBusy(address); // for now remove worker again from pool such that other may connect pool.removeWorker(address); // erase result result.reset(); LOG(1, "INFO: JobsQueue has " << JobsQueue.getDoneJobs() << " results."); } /** Callback function when result has been received. * * \param e error code if something went wrong * \param conn reference with the connection */ void FragmentScheduler::WorkerListener_t::handle_RejectResultFromWorker(const boost::system::error_code& e, connection_ptr conn) { Info info(__FUNCTION__); // nothing to do LOG(1, "INFO: Rejecting result for job #" << result->getId() << ", placing back into queue."); JobsQueue.resubmitJob(result->getId()); LOG(1, "INFO: JobsQueue has " << JobsQueue.getDoneJobs() << " results."); } /** Handle a new controller connection. * * \sa handle_ReceiveJobs() * \sa handle_CheckResultState() * \sa handle_SendResults() * * \param e error code if something went wrong * \param conn reference with the connection */ void FragmentScheduler::ControllerListener_t::handle_Accept(const boost::system::error_code& e, connection_ptr conn) { Info info(__FUNCTION__); if (!e) { conn->async_read(choice, boost::bind(&FragmentScheduler::ControllerListener_t::handle_ReadChoice, this, boost::asio::placeholders::error, conn)); } else { // An error occurred. Log it and return. Since we are not starting a new // accept operation the io_service will run out of work to do and the // server will exit. Exitflag = ErrorFlag; ELOG(0, e.message()); } } /** Controller callback function to read the choice for next operation. * * \param e error code if something went wrong * \param conn reference with the connection */ void FragmentScheduler::ControllerListener_t::handle_ReadChoice(const boost::system::error_code& e, connection_ptr conn) { Info info(__FUNCTION__); if (!e) { bool LaunchNewAcceptor = true; LOG(1, "INFO: Received request for operation " << choice << "."); // switch over the desired choice read previously switch(choice) { case NoControllerOperation: { ELOG(1, "ControllerListener_t::handle_ReadChoice() - called with NoOperation."); break; } case GetNextJobId: { const JobId_t nextid = globalId.getNextId(); LOG(1, "INFO: Sending next available job id " << nextid << " to controller ..."); conn->async_write(nextid, boost::bind(&FragmentScheduler::ControllerListener_t::handle_GetNextJobIdState, this, boost::asio::placeholders::error, conn)); break; } case ReceiveJobs: { // The connection::async_write() function will automatically // serialize the data structure for us. LOG(1, "INFO: Receiving bunch of jobs from a controller ..."); conn->async_read(jobs, boost::bind(&FragmentScheduler::ControllerListener_t::handle_ReceiveJobs, this, boost::asio::placeholders::error, conn)); break; } case CheckState: { // first update number jobInfo[0] = JobsQueue.getPresentJobs(); jobInfo[1] = JobsQueue.getDoneJobs(); // now we accept connections to check for state of calculations LOG(1, "INFO: Sending state that "+toString(jobInfo[0])+" jobs are present and "+toString(jobInfo[1])+" jobs are done to controller ..."); conn->async_write(jobInfo, boost::bind(&FragmentScheduler::ControllerListener_t::handle_CheckResultState, this, boost::asio::placeholders::error, conn)); break; } case SendResults: { const std::vector results = JobsQueue.getAllResults(); // ... or we give the results LOG(1, "INFO: Sending "+toString(results.size())+" results to controller ..."); conn->async_write(results, boost::bind(&FragmentScheduler::ControllerListener_t::handle_SendResults, this, boost::asio::placeholders::error, conn)); break; } case ShutdownControllerSocket: { LOG(1, "INFO: Received shutdown from controller ..."); // only allow for shutdown when there are no more jobs in the queue if (!JobsQueue.isJobPresent()) { LaunchNewAcceptor = false; } else { ELOG(2, "There are still jobs waiting in the queue."); } break; } default: Exitflag = ErrorFlag; ELOG(1, "ControllerListener_t::handle_ReadChoice() - called with no valid choice."); break; } // restore NoControllerOperation choice such that choice is not read twice choice = NoControllerOperation; if (LaunchNewAcceptor) { LOG(1, "Launching new acceptor on socket."); // Start an accept operation for a new Connection. initiateSocket(); } } else { // An error occurred. Log it and return. Since we are not starting a new // accept operation the io_service will run out of work to do and the // server will exit. Exitflag = ErrorFlag; ELOG(0, e.message()); } } /** Controller callback function when job has been sent. * * We check here whether the worker socket is accepting, if there * have been no jobs we re-activate it, as it is shut down after * last job. * * \param e error code if something went wrong * \param conn reference with the connection */ void FragmentScheduler::ControllerListener_t::handle_ReceiveJobs(const boost::system::error_code& e, connection_ptr conn) { Info info(__FUNCTION__); bool need_initiateSocket = !JobsQueue.isJobPresent(); // jobs are received, hence place in JobsQueue if (!jobs.empty()) { LOG(1, "INFO: Pushing " << jobs.size() << " jobs into queue."); JobsQueue.pushJobs(jobs); } jobs.clear(); // initiate socket if we had no jobs before if (need_initiateSocket) initiateWorkerSocket(); } /** Controller callback function when checking on state of results. * * \param e error code if something went wrong * \param conn reference with the connection */ void FragmentScheduler::ControllerListener_t::handle_CheckResultState(const boost::system::error_code& e, connection_ptr conn) { Info info(__FUNCTION__); // do nothing LOG(1, "INFO: Sent that " << jobInfo << " jobs are (scheduled, done)."); } /** Controller callback function when checking on state of results. * * \param e error code if something went wrong * \param conn reference with the connection */ void FragmentScheduler::ControllerListener_t::handle_GetNextJobIdState(const boost::system::error_code& e, connection_ptr conn) { Info info(__FUNCTION__); // do nothing LOG(1, "INFO: Sent next available job id."); } /** Controller callback function when result has been received. * * \param e error code if something went wrong * \param conn reference with the connection */ void FragmentScheduler::ControllerListener_t::handle_SendResults(const boost::system::error_code& e, connection_ptr conn) { Info info(__FUNCTION__); // do nothing LOG(1, "INFO: Results have been sent."); } /** Helper function to send a job to worker. * * Note that we do not set the worker as busy. We simply send it the job. * * @param address address of worker * @param job job to send */ void FragmentScheduler::sendJobToWorker(const WorkerAddress &address, FragmentJob::ptr &job) { ASSERT( pool.isWorkerBusy(address), "FragmentScheduler::sendJobToWorker() - Worker "+toString(address)+" is not marked as busy."); LOG(1, "INFO: Sending job " << job->getId() << " to worker " << address << "."); sendJobOp.setJob(job); sendJobOp(address.host, address.service); } ///** Helper function to shutdown a single worker. // * // * We send NoJob to indicate shutdown // * // * @param address of worker to shutdown // */ //void FragmentScheduler::shutdownWorker(const WorkerAddress &address) //{ // sendJobToWorker(address, NoJob); //} // ///** Sends shutdown to all current workers in the pool. // * // */ //void FragmentScheduler::removeAllWorkers() //{ // // give all workers shutdown signal // while (pool.presentIdleWorkers()) { // const WorkerAddress address = pool.getNextIdleWorker(); // shutdownWorker(address); // } //}