/* * Project: MoleCuilder * Description: creates and alters molecular systems * Copyright (C) 2010-2012 University of Bonn. All rights reserved. * * * This file is part of MoleCuilder. * * MoleCuilder is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * MoleCuilder is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with MoleCuilder. If not, see . */ /* * CyclicStructureAnalysis.cpp * * Created on: Feb 16, 2011 * Author: heber */ // include config.h #ifdef HAVE_CONFIG_H #include #endif #include "CodePatterns/MemDebug.hpp" #include "CyclicStructureAnalysis.hpp" #include #include "Atom/atom.hpp" #include "Bond/bond.hpp" #include "CodePatterns/Assert.hpp" #include "CodePatterns/Info.hpp" #include "CodePatterns/Log.hpp" #include "CodePatterns/Verbose.hpp" #include "Element/element.hpp" #include "molecule.hpp" CyclicStructureAnalysis::CyclicStructureAnalysis(const enum HydrogenTreatment _treatment) : treatment(_treatment) {} CyclicStructureAnalysis::~CyclicStructureAnalysis() {} /** Initialise vertex as white with no predecessor, no shortest path(-1), color white. * \param atom_id id of atom whose node we address */ void CyclicStructureAnalysis::InitNode(atomId_t atom_id) { ShortestPathList[atom_id] = -1; PredecessorList[atom_id] = 0; ColorList[atom_id] = GraphEdge::white; } void CyclicStructureAnalysis::Reset() { // clear what's present ShortestPathList.clear(); PredecessorList.clear(); ColorList.clear(); BFSStack.clear(); TouchedStack.clear(); } /** Clean the accounting structure for all nodes touched so far. */ void CyclicStructureAnalysis::CleanAllTouched() { atom *Walker = NULL; while (!TouchedStack.empty()) { Walker = TouchedStack.front(); TouchedStack.pop_front(); PredecessorList[Walker->getNr()] = NULL; ShortestPathList[Walker->getNr()] = -1; ColorList[Walker->getNr()] = GraphEdge::white; } } /** Resets shortest path list and BFSStack. * \param *&Walker current node, pushed onto BFSStack and TouchedStack */ void CyclicStructureAnalysis::InitializeToRoot(atom *&Root) { ColorList.clear(); ShortestPathList.clear(); ShortestPathList[Root->getNr()] = 0; PredecessorList.clear(); BFSStack.clear(); // start with empty BFS stack BFSStack.push_front(Root); TouchedStack.push_front(Root); } /** Performs a BFS from \a *Root, trying to find the same node and hence all cycles. * * We exclude the back edge, hence the direct way is prohibited. But as it is a back edge, * there must be at least one more way to \a *Root. This leads us to all cycles for this * back edge. * * \param OtherAtom pointing to Root on return indicating found cycle * \param *&BackEdge the edge from root that we don't want to move along * \param MinRingSize set to minimum over all cycles encountered */ void CyclicStructureAnalysis::findAllCyclesforBackEdge( atom *&OtherAtom, bond::ptr &BackEdge, int &MinRingSize) { size_t MaximumHorizon = (size_t)-1; // will overflow to largest number atom *Walker = NULL; do { // look for Root ASSERT(!BFSStack.empty(), "CyclicStructureAnalysis_CyclicBFSFromRootToRoot() - BFSStack is empty!"); Walker = BFSStack.back(); BFSStack.pop_back(); LOG(2, "INFO: Current Walker is " << *Walker << ", we look for SP to Root " << *Root << "."); // check all edges/bonds from current Walker if (MaximumHorizon >= (size_t)ShortestPathList[Walker->getNr()]) { const BondList& ListOfBonds = Walker->getListOfBonds(); for (BondList::const_iterator Runner = ListOfBonds.begin(); Runner != ListOfBonds.end(); ++Runner) { if ((*Runner) != BackEdge) { // only walk along DFS spanning tree (otherwise we always find SP of one being backedge Binder) OtherAtom = (*Runner)->GetOtherAtom(Walker); if ((treatment == IncludeHydrogen) || (OtherAtom->getType()->getAtomicNumber() != 1)) { LOG(2, "INFO: Current OtherAtom is: " << OtherAtom->getName() << " for bond " << *(*Runner) << "."); if (ColorList[OtherAtom->getNr()] == GraphEdge::white) { // we discovered a new node/atom TouchedStack.push_front(OtherAtom); ColorList[OtherAtom->getNr()] = GraphEdge::lightgray; PredecessorList[OtherAtom->getNr()] = Walker; // Walker is the predecessor ShortestPathList[OtherAtom->getNr()] = ShortestPathList[Walker->getNr()] + 1; LOG(2, "INFO: Coloring OtherAtom " << OtherAtom->getName() << " lightgray, its predecessor is " << Walker->getName() << " and its Shortest Path is " << ShortestPathList[OtherAtom->getNr()] << " egde(s) long."); //if (ShortestPathList[OtherAtom->getNr()] < MinimumRingSize[Walker->GetTrueFather()->getNr()]) { // Check for maximum distance LOG(3, "ACCEPT: Putting OtherAtom " << OtherAtom->getName() << " into queue."); BFSStack.push_front(OtherAtom); //} } else { LOG(3, "REJECT: Not Adding, has already been visited."); } if (OtherAtom == Root) break; } else { LOG(2, "INFO: Skipping hydrogen atom " << *OtherAtom << "."); ColorList[OtherAtom->getNr()] = GraphEdge::black; } } else { LOG(2, "REJECT: Bond " << *(*Runner) << " not Visiting, is the back edge."); } } ColorList[Walker->getNr()] = GraphEdge::black; LOG(1, "INFO: Coloring Walker " << Walker->getName() << " " << GraphEdge::getColorName(ColorList[Walker->getNr()]) << "."); } else { LOG(1, "INFO: Skipping bonds for " << Walker->getName() << " as it resides on the horizon."); } // have we closed the cycle, i.e. stumbled upon Root by another mean than // the back edge? if (OtherAtom == Root) { // check whether this cycle wasn't already found beforehand by stepping // through predecessor list int RingSize = RetrieveCycleMembers(OtherAtom, BackEdge, MinRingSize); MaximumHorizon = std::min( MaximumHorizon, (size_t)RingSize ); LOG(2, "INFO: Maximum horizon is set to " << MaximumHorizon); // remove last step and prepare for a possible yet another path to Root do { ASSERT(!TouchedStack.empty(), "CyclicStructureAnalysis_CyclicBFSFromRootToRoot() - TouchedStack is empty!"); OtherAtom = TouchedStack.front(); TouchedStack.pop_front(); if (PredecessorList[OtherAtom->getNr()] == Walker) { LOG(4, "INFO: Removing " << *OtherAtom << " from lists and stacks."); PredecessorList[OtherAtom->getNr()] = NULL; ShortestPathList[OtherAtom->getNr()] = -1; ColorList[OtherAtom->getNr()] = GraphEdge::white; // rats ... deque has no find() std::deque::iterator iter = find( BFSStack.begin(), BFSStack.end(), OtherAtom); ASSERT(iter != BFSStack.end(), "CyclicStructureAnalysis_CyclicBFSFromRootToRoot() - can't find "+toString(*OtherAtom)+" on stack!"); BFSStack.erase(iter); } } while ((!TouchedStack.empty()) && (PredecessorList[OtherAtom->getNr()] == NULL)); TouchedStack.push_front(OtherAtom); // last was wrongly popped OtherAtom = BackEdge->rightatom; // set to not Root } } while ((!BFSStack.empty()) && (OtherAtom != Root) && (OtherAtom != NULL)); // || (ShortestPathList[OtherAtom->getNr()] < MinimumRingSize[Walker->GetTrueFather()->getNr()]))); } /** Extracts cycle from BFSAccounting::PredecessorList with given \a BackEdge and current \a Root. * * \param BackEdge back edge of this cycle */ CyclicStructureAnalysis::cycle_t CyclicStructureAnalysis::extractCurrentCycle( bond::ptr &BackEdge) { CyclicStructureAnalysis::cycle_t currentcycle; atom *Walker = Root; currentcycle.insert(Walker->GetTrueFather()->getId()); std::stringstream output; while (Walker != BackEdge->rightatom) { // leftatom is root Walker = PredecessorList[Walker->getNr()]; Walker->GetTrueFather()->IsCyclic = true; output << Walker->getName() << " <-> "; #ifndef NDEBUG std::pair< cycle_t::iterator, bool > inserter = #endif currentcycle.insert(Walker->GetTrueFather()->getId()); ASSERT( inserter.second, "CyclicStructureAnalysis::RetrieveCycleMembers() - we already inserted " +toString(Walker->GetTrueFather()->getId())+" into currentcycle."); } LOG(3, "INFO: " << output.str() << Root->getName()); return currentcycle; } /** Climb back the BFSAccounting::PredecessorList and find cycle members. * \param *&OtherAtom * \param *&BackEdge denotes the edge we did not want to travel along when doing CyclicBFSFromRootToRoot() * \param &BFS accounting structure * \param &MinRingSize global minimum distance from one node without encountering oneself, set on return * \return size of found cycle, -1 - nothing found, something went wrong */ int CyclicStructureAnalysis::RetrieveCycleMembers( atom *&OtherAtom, bond::ptr &BackEdge, int &MinRingSize) { int RingSize = -1; bool newcycle = false; if (OtherAtom == Root) { // now climb back the predecessor list and thus find the cycle members Root->GetTrueFather()->IsCyclic = true; { // check whether cycle is present already atom *Walker = Root; LOG(4, "DEBUG: Checking whether all predecessors are already marked cyclic ..."); while (Walker != BackEdge->rightatom) { // Note that leftatom is Root itself if (!Walker->GetTrueFather()->IsCyclic) { // if one bond in the loop is not marked as cyclic, we haven't found this cycle yet LOG(4, "\tDEBUG: Walker " << *Walker << " is not cyclic, breaking."); break; } else Walker = PredecessorList[Walker->getNr()]; } LOG(4, "DEBUG: Checking done."); // if each atom in found cycle is cyclic, loop's been found before already // exctract cycle { const cycle_t currentcycle = extractCurrentCycle(BackEdge); if (Walker != BackEdge->rightatom) { // loop got round completely allcycles.push_back(currentcycle); newcycle = true; } else { LOG(3, "INFO: All bonds are marked cyclic already, checking allcycles whether cycle is already present."); const cycles_t::const_iterator iter = std::find(allcycles.begin(), allcycles.end(), currentcycle); if (iter == allcycles.end()) { // not found allcycles.push_back(currentcycle); newcycle = true; } } RingSize = currentcycle.size(); if (newcycle) { LOG(0, "INFO: " << "Found ring contains: " << currentcycle << " with a length of " << RingSize); } else { LOG(1, "INFO: cycle " << currentcycle << " is already present."); } } } if (newcycle) { // walk through all and set MinimumRingSize atom *Walker = Root; if ((MinimumRingSize.count(Walker->GetTrueFather()->getNr()) == 0) || (RingSize < MinimumRingSize[Walker->GetTrueFather()->getNr()])) { MinimumRingSize[Walker->GetTrueFather()->getNr()] = RingSize; } else { LOG(3, "INFO: Not setting MinimumRingSize of "<< *(Walker->GetTrueFather()) << " to " << RingSize << " which is already set to " << MinimumRingSize[Walker->GetTrueFather()->getNr()] << "."); } while (Walker != BackEdge->rightatom) { // note that Root is leftatom Walker = PredecessorList[Walker->getNr()]; if ((MinimumRingSize.count(Walker->GetTrueFather()->getNr()) == 0) || (RingSize < MinimumRingSize[Walker->GetTrueFather()->getNr()])) MinimumRingSize[Walker->GetTrueFather()->getNr()] = RingSize; } if ((RingSize < MinRingSize) || (MinRingSize == -1)) MinRingSize = RingSize; } } else { LOG(1, "INFO: No ring containing " << *Root << " with length equal to or smaller than " << MinimumRingSize[Root->GetTrueFather()->getNr()] << " found."); } return RingSize; } /** From a given node performs a BFS to touch the next cycle, for whose nodes \a MinimumRingSize is set and set it accordingly. * \param *&Walker node to look for closest cycle from, i.e. \a MinimumRingSize is set for this node * \param AtomCount number of nodes in graph */ void CyclicStructureAnalysis::BFSToNextCycle(atom *Walker) { atom *Root = Walker; atom *OtherAtom = Walker; Reset(); InitializeToRoot(Walker); while (OtherAtom != NULL) { // look for Root ASSERT(!BFSStack.empty(), "CyclicStructureAnalysis_BFSToNextCycle() - BFSStack is empty!"); Walker = BFSStack.front(); BFSStack.pop_front(); LOG(2, "INFO: Current Walker is " << *Walker << ", BFS-stepping away from Root " << *Root << "."); const BondList& ListOfBonds = Walker->getListOfBonds(); for (BondList::const_iterator Runner = ListOfBonds.begin(); Runner != ListOfBonds.end(); ++Runner) { // "removed (*Runner) != BackEdge) || " from next if, is u // only walk along DFS spanning tree (otherwise we always find SP of 1 // being backedge Binder), but terminal hydrogens may be connected via // backedge, hence extra check // if ((ListOfBonds.size() != 1)) { OtherAtom = (*Runner)->GetOtherAtom(Walker); if ((treatment == IncludeHydrogen) || (OtherAtom->getType()->getAtomicNumber() != 1)) { LOG(2, "INFO: Current OtherAtom is: " << OtherAtom->getName() << " for bond " << *(*Runner) << "."); if (ColorList[OtherAtom->getNr()] == GraphEdge::white) { TouchedStack.push_front(OtherAtom); ColorList[OtherAtom->getNr()] = GraphEdge::lightgray; PredecessorList[OtherAtom->getNr()] = Walker; // Walker is the predecessor ShortestPathList[OtherAtom->getNr()] = ShortestPathList[Walker->getNr()] + 1; LOG(2, "ACCEPT: Coloring OtherAtom " << OtherAtom->getName() << " lightgray, its predecessor is " << Walker->getName() << " and its Shortest Path is " << ShortestPathList[OtherAtom->getNr()] << " egde(s) long."); // distance is a locally optimal criterion (we have eliminated all // cycles already). Hence, we may assume that all set MinimumRingSize // correspond to shortest distances to cycles. I.e., as soon as we reach // as set MinimumRingSize we may use it and the current shortest path // distance to it if (MinimumRingSize.count(OtherAtom->GetTrueFather()->getNr())) { LOG(2, "SUCCESS: Found set MinimumRingSize at " << *OtherAtom << ", walking back to Root " << *Root << "."); // set all predecessors const unsigned int shorttestpath = ShortestPathList[OtherAtom->getNr()]; atom *Backwalker = OtherAtom; while (Backwalker != Root) { Backwalker = PredecessorList[Backwalker->getNr()]; MinimumRingSize[Backwalker->GetTrueFather()->getNr()] = (shorttestpath - ShortestPathList[Backwalker->getNr()]) + MinimumRingSize[OtherAtom->GetTrueFather()->getNr()]; LOG(2, "Setting MinimumRingSize of " << *Backwalker << " to " << MinimumRingSize[Backwalker->GetTrueFather()->getNr()] << "."); } OtherAtom = NULL; //break; break; } else BFSStack.push_front(OtherAtom); } else { LOG(3, "REJECT: Not Adding, has already been visited."); } } else { LOG(3, "REJECT: Not Visiting, is a back edge to hydrogen."); } // } } ColorList[Walker->getNr()] = GraphEdge::black; LOG(1, "INFO: Coloring Walker " << Walker->getName() << " " << GraphEdge::getColorName(ColorList[Walker->getNr()]) << "."); } } /** All nodes that are not in cycles get assigned a \a *&MinimumRingSize by BFS to next cycle. * \param *&MinimumRingSize array with minimum distance without encountering oneself for each atom * \param MinRingSize global minium distance */ void CyclicStructureAnalysis::AssignRingSizetoNonCycleMembers(const int MinRingSize) { atom *Walker = NULL; if (MinRingSize != -1) { // if rings are present // go over all atoms World::AtomComposite allatoms = World::getInstance().getAllAtoms(); for (World::AtomComposite::const_iterator iter = allatoms.begin(); iter != allatoms.end(); ++iter) { Walker = *iter; if (MinimumRingSize.find(Walker->GetTrueFather()->getNr()) == MinimumRingSize.end()) { // check whether MinimumRingSize is set, if not BFS to next where it is LOG(1, "---------------------------------------------------------------------------------------------------------"); BFSToNextCycle(Walker); } ASSERT(MinimumRingSize.find(Walker->GetTrueFather()->getNr()) != MinimumRingSize.end(), "CyclicStructureAnalysis::AssignRingSizetoNonCycleMembers() - BFSToNextCycle did not set MinimumRingSize of " +toString(*(Walker->GetTrueFather()))+"."); LOG(1, "INFO: Minimum ring size of " << *Walker << " is " << MinimumRingSize[Walker->GetTrueFather()->getNr()] << "."); } LOG(1, "INFO: Minimum ring size is " << MinRingSize << ", over " << allcycles.size() << " cycle(s) total."); } else LOG(1, "INFO: No rings were detected in the molecular structure."); } /** Analyses the cycles found and returns minimum of all cycle lengths. * We begin with a list of Back edges found during DepthFirstSearchAnalysis(). We go through this list - one end is the Root, * the other our initial Walker - and do a Breadth First Search for the Root. We mark down each Predecessor and as soon as * we have found the Root via BFS, we may climb back the closed cycle via the Predecessors. Thereby we mark atoms and bonds * as cyclic and print out the cycles. * \param *BackEdgeStack stack with all back edges found during DFS scan. Beware: This stack contains the bonds from the total molecule, not from the subgraph! * \todo BFS from the not-same-LP to find back to starting point of tributary cycle over more than one bond */ void CyclicStructureAnalysis::operator()(std::deque * BackEdgeStack) { Info FunctionInfo("CyclicStructureAnalysis"); atom *Walker = NULL; atom *OtherAtom = NULL; bond::ptr BackEdge; int MinRingSize = -1; // clear cycle container allcycles.clear(); { std::stringstream output; output << "Back edge list - "; for (std::deque::const_iterator iter = BackEdgeStack->begin(); iter != BackEdgeStack->end(); ++iter) output << **iter << " "; LOG(0, output.str()); } LOG(1, "STATUS: Analysing cycles ... "); while (!BackEdgeStack->empty()) { BackEdge = BackEdgeStack->front(); BackEdgeStack->pop_front(); // this is the target Root = BackEdge->leftatom; // this is the source point Walker = BackEdge->rightatom; InitializeToRoot(Walker); LOG(1, "---------------------------------------------------------------------------------------------------------"); OtherAtom = NULL; // find all cycles for current BackEdge findAllCyclesforBackEdge(OtherAtom, BackEdge, MinRingSize); CleanAllTouched(); } AssignRingSizetoNonCycleMembers(MinRingSize); } /** Output a list of flags, stating whether the bond was visited or not. * \param *list list to print */ void CyclicStructureAnalysis::OutputAlreadyVisited(int *list) { std::stringstream output; output << "Already Visited Bonds:\t"; for (int i = 1; i <= list[0]; i++) output << list[i] << " "; LOG(0, output.str()); } const std::map& CyclicStructureAnalysis::getMinimumRingSize() const { return MinimumRingSize; }