source: molecuilder/src/molecule_fragmentation.cpp@ 31ccb6

Last change on this file since 31ccb6 was 31ccb6, checked in by Frederik Heber <heber@…>, 15 years ago

molecule::StoreAdjacencyToFile() and molecule::StoreBondsToFile() now take additional filename.

  • since ParseCommandLineOptions() has cases j and J which use the above functions, we have to generalize these functions to work also without a given path and with arbritrary filename.

Signed-off-by: Frederik Heber <heber@…>

  • Property mode set to 100644
File size: 76.5 KB
Line 
1/*
2 * molecule_fragmentation.cpp
3 *
4 * Created on: Oct 5, 2009
5 * Author: heber
6 */
7
8#include <cstring>
9
10#include "atom.hpp"
11#include "bond.hpp"
12#include "config.hpp"
13#include "element.hpp"
14#include "helpers.hpp"
15#include "lists.hpp"
16#include "log.hpp"
17#include "memoryallocator.hpp"
18#include "molecule.hpp"
19#include "periodentafel.hpp"
20
21/************************************* Functions for class molecule *********************************/
22
23
24/** Estimates by educated guessing (using upper limit) the expected number of fragments.
25 * The upper limit is
26 * \f[
27 * n = N \cdot C^k
28 * \f]
29 * where \f$C=2^c\f$ and c is the maximum bond degree over N number of atoms.
30 * \param *out output stream for debugging
31 * \param order bond order k
32 * \return number n of fragments
33 */
34int molecule::GuesstimateFragmentCount(int order)
35{
36 size_t c = 0;
37 int FragmentCount;
38 // get maximum bond degree
39 atom *Walker = start;
40 while (Walker->next != end) {
41 Walker = Walker->next;
42 c = (Walker->ListOfBonds.size() > c) ? Walker->ListOfBonds.size() : c;
43 }
44 FragmentCount = NoNonHydrogen*(1 << (c*order));
45 Log() << Verbose(1) << "Upper limit for this subgraph is " << FragmentCount << " for " << NoNonHydrogen << " non-H atoms with maximum bond degree of " << c << "." << endl;
46 return FragmentCount;
47};
48
49/** Scans a single line for number and puts them into \a KeySet.
50 * \param *out output stream for debugging
51 * \param *buffer buffer to scan
52 * \param &CurrentSet filled KeySet on return
53 * \return true - at least one valid atom id parsed, false - CurrentSet is empty
54 */
55bool ScanBufferIntoKeySet(char *buffer, KeySet &CurrentSet)
56{
57 stringstream line;
58 int AtomNr;
59 int status = 0;
60
61 line.str(buffer);
62 while (!line.eof()) {
63 line >> AtomNr;
64 if (AtomNr >= 0) {
65 CurrentSet.insert(AtomNr); // insert at end, hence in same order as in file!
66 status++;
67 } // else it's "-1" or else and thus must not be added
68 }
69 Log() << Verbose(1) << "The scanned KeySet is ";
70 for(KeySet::iterator runner = CurrentSet.begin(); runner != CurrentSet.end(); runner++) {
71 Log() << Verbose(0) << (*runner) << "\t";
72 }
73 Log() << Verbose(0) << endl;
74 return (status != 0);
75};
76
77/** Parses the KeySet file and fills \a *FragmentList from the known molecule structure.
78 * Does two-pass scanning:
79 * -# Scans the keyset file and initialises a temporary graph
80 * -# Scans TEFactors file and sets the TEFactor of each key set in the temporary graph accordingly
81 * Finally, the temporary graph is inserted into the given \a FragmentList for return.
82 * \param *out output stream for debugging
83 * \param *path path to file
84 * \param *FragmentList empty, filled on return
85 * \return true - parsing successfully, false - failure on parsing (FragmentList will be NULL)
86 */
87bool ParseKeySetFile(char *path, Graph *&FragmentList)
88{
89 bool status = true;
90 ifstream InputFile;
91 stringstream line;
92 GraphTestPair testGraphInsert;
93 int NumberOfFragments = 0;
94 char *filename = Malloc<char>(MAXSTRINGSIZE, "molecule::ParseKeySetFile - filename");
95
96 if (FragmentList == NULL) { // check list pointer
97 FragmentList = new Graph;
98 }
99
100 // 1st pass: open file and read
101 Log() << Verbose(1) << "Parsing the KeySet file ... " << endl;
102 sprintf(filename, "%s/%s%s", path, FRAGMENTPREFIX, KEYSETFILE);
103 InputFile.open(filename);
104 if (InputFile != NULL) {
105 // each line represents a new fragment
106 char *buffer = Malloc<char>(MAXSTRINGSIZE, "molecule::ParseKeySetFile - *buffer");
107 // 1. parse keysets and insert into temp. graph
108 while (!InputFile.eof()) {
109 InputFile.getline(buffer, MAXSTRINGSIZE);
110 KeySet CurrentSet;
111 if ((strlen(buffer) > 0) && (ScanBufferIntoKeySet(buffer, CurrentSet))) { // if at least one valid atom was added, write config
112 testGraphInsert = FragmentList->insert(GraphPair (CurrentSet,pair<int,double>(NumberOfFragments++,1))); // store fragment number and current factor
113 if (!testGraphInsert.second) {
114 eLog() << Verbose(0) << "KeySet file must be corrupt as there are two equal key sets therein!" << endl;
115 performCriticalExit();
116 }
117 }
118 }
119 // 2. Free and done
120 InputFile.close();
121 InputFile.clear();
122 Free(&buffer);
123 Log() << Verbose(1) << "done." << endl;
124 } else {
125 Log() << Verbose(1) << "File " << filename << " not found." << endl;
126 status = false;
127 }
128
129 Free(&filename);
130 return status;
131};
132
133/** Parses the TE factors file and fills \a *FragmentList from the known molecule structure.
134 * -# Scans TEFactors file and sets the TEFactor of each key set in the temporary graph accordingly
135 * \param *out output stream for debugging
136 * \param *path path to file
137 * \param *FragmentList graph whose nodes's TE factors are set on return
138 * \return true - parsing successfully, false - failure on parsing
139 */
140bool ParseTEFactorsFile(char *path, Graph *FragmentList)
141{
142 bool status = true;
143 ifstream InputFile;
144 stringstream line;
145 GraphTestPair testGraphInsert;
146 int NumberOfFragments = 0;
147 double TEFactor;
148 char *filename = Malloc<char>(MAXSTRINGSIZE, "molecule::ParseTEFactorsFile - filename");
149
150 if (FragmentList == NULL) { // check list pointer
151 FragmentList = new Graph;
152 }
153
154 // 2nd pass: open TEFactors file and read
155 Log() << Verbose(1) << "Parsing the TEFactors file ... " << endl;
156 sprintf(filename, "%s/%s%s", path, FRAGMENTPREFIX, TEFACTORSFILE);
157 InputFile.open(filename);
158 if (InputFile != NULL) {
159 // 3. add found TEFactors to each keyset
160 NumberOfFragments = 0;
161 for(Graph::iterator runner = FragmentList->begin();runner != FragmentList->end(); runner++) {
162 if (!InputFile.eof()) {
163 InputFile >> TEFactor;
164 (*runner).second.second = TEFactor;
165 Log() << Verbose(2) << "Setting " << ++NumberOfFragments << " fragment's TEFactor to " << (*runner).second.second << "." << endl;
166 } else {
167 status = false;
168 break;
169 }
170 }
171 // 4. Free and done
172 InputFile.close();
173 Log() << Verbose(1) << "done." << endl;
174 } else {
175 Log() << Verbose(1) << "File " << filename << " not found." << endl;
176 status = false;
177 }
178
179 // free memory
180 Free(&filename);
181
182 return status;
183};
184
185/** Stores key sets to file.
186 * \param *out output stream for debugging
187 * \param KeySetList Graph with Keysets
188 * \param *path path to file
189 * \return true - file written successfully, false - writing failed
190 */
191bool StoreKeySetFile(Graph &KeySetList, char *path)
192{
193 ofstream output;
194 bool status = true;
195 string line;
196
197 // open KeySet file
198 line = path;
199 line.append("/");
200 line += FRAGMENTPREFIX;
201 line += KEYSETFILE;
202 output.open(line.c_str(), ios::out);
203 Log() << Verbose(1) << "Saving key sets of the total graph ... ";
204 if(output != NULL) {
205 for(Graph::iterator runner = KeySetList.begin(); runner != KeySetList.end(); runner++) {
206 for (KeySet::iterator sprinter = (*runner).first.begin();sprinter != (*runner).first.end(); sprinter++) {
207 if (sprinter != (*runner).first.begin())
208 output << "\t";
209 output << *sprinter;
210 }
211 output << endl;
212 }
213 Log() << Verbose(0) << "done." << endl;
214 } else {
215 eLog() << Verbose(0) << "Unable to open " << line << " for writing keysets!" << endl;
216 performCriticalExit();
217 status = false;
218 }
219 output.close();
220 output.clear();
221
222 return status;
223};
224
225
226/** Stores TEFactors to file.
227 * \param *out output stream for debugging
228 * \param KeySetList Graph with factors
229 * \param *path path to file
230 * \return true - file written successfully, false - writing failed
231 */
232bool StoreTEFactorsFile(Graph &KeySetList, char *path)
233{
234 ofstream output;
235 bool status = true;
236 string line;
237
238 // open TEFactors file
239 line = path;
240 line.append("/");
241 line += FRAGMENTPREFIX;
242 line += TEFACTORSFILE;
243 output.open(line.c_str(), ios::out);
244 Log() << Verbose(1) << "Saving TEFactors of the total graph ... ";
245 if(output != NULL) {
246 for(Graph::iterator runner = KeySetList.begin(); runner != KeySetList.end(); runner++)
247 output << (*runner).second.second << endl;
248 Log() << Verbose(1) << "done." << endl;
249 } else {
250 Log() << Verbose(1) << "failed to open " << line << "." << endl;
251 status = false;
252 }
253 output.close();
254
255 return status;
256};
257
258/** For a given graph, sorts KeySets into a (index, keyset) map.
259 * \param *GlobalKeySetList list of keysets with global ids (valid in "this" molecule) needed for adaptive increase
260 * \return map from index to keyset
261 */
262map<int,KeySet> * GraphToIndexedKeySet(Graph *GlobalKeySetList)
263{
264 map<int,KeySet> *IndexKeySetList = new map<int,KeySet>;
265 for(Graph::iterator runner = GlobalKeySetList->begin(); runner != GlobalKeySetList->end(); runner++) {
266 IndexKeySetList->insert( pair<int,KeySet>(runner->second.first,runner->first) );
267 }
268 return IndexKeySetList;
269};
270
271/** Inserts a (\a No, \a value) pair into the list, overwriting present one.
272 * Note if values are equal, No will decided on which is first
273 * \param *out output stream for debugging
274 * \param &AdaptiveCriteriaList list to insert into
275 * \param &IndexedKeySetList list to find key set for a given index \a No
276 * \param FragOrder current bond order of fragment
277 * \param No index of keyset
278 * \param value energy value
279 */
280void InsertIntoAdaptiveCriteriaList(map<int, pair<double,int> > *AdaptiveCriteriaList, map<int,KeySet> &IndexKeySetList, int FragOrder, int No, double Value)
281{
282 map<int,KeySet>::iterator marker = IndexKeySetList.find(No); // find keyset to Frag No.
283 if (marker != IndexKeySetList.end()) { // if found
284 Value *= 1 + MYEPSILON*(*((*marker).second.begin())); // in case of equal energies this makes them not equal without changing anything actually
285 // as the smallest number in each set has always been the root (we use global id to keep the doubles away), seek smallest and insert into AtomMask
286 pair <map<int, pair<double,int> >::iterator, bool> InsertedElement = AdaptiveCriteriaList->insert( make_pair(*((*marker).second.begin()), pair<double,int>( fabs(Value), FragOrder) ));
287 map<int, pair<double,int> >::iterator PresentItem = InsertedElement.first;
288 if (!InsertedElement.second) { // this root is already present
289 if ((*PresentItem).second.second < FragOrder) // if order there is lower, update entry with higher-order term
290 //if ((*PresentItem).second.first < (*runner).first) // as higher-order terms are not always better, we skip this part (which would always include this site into adaptive increase)
291 { // if value is smaller, update value and order
292 (*PresentItem).second.first = fabs(Value);
293 (*PresentItem).second.second = FragOrder;
294 Log() << Verbose(2) << "Updated element (" << (*PresentItem).first << ",[" << (*PresentItem).second.first << "," << (*PresentItem).second.second << "])." << endl;
295 } else {
296 Log() << Verbose(2) << "Did not update element " << (*PresentItem).first << " as " << FragOrder << " is less than or equal to " << (*PresentItem).second.second << "." << endl;
297 }
298 } else {
299 Log() << Verbose(2) << "Inserted element (" << (*PresentItem).first << ",[" << (*PresentItem).second.first << "," << (*PresentItem).second.second << "])." << endl;
300 }
301 } else {
302 Log() << Verbose(1) << "No Fragment under No. " << No << "found." << endl;
303 }
304};
305
306/** Scans the adaptive order file and insert (index, value) into map.
307 * \param *out output stream for debugging
308 * \param *path path to ENERGYPERFRAGMENT file (may be NULL if Order is non-negative)
309 * \param &IndexedKeySetList list to find key set for a given index \a No
310 * \return adaptive criteria list from file
311 */
312map<int, pair<double,int> > * ScanAdaptiveFileIntoMap(char *path, map<int,KeySet> &IndexKeySetList)
313{
314 map<int, pair<double,int> > *AdaptiveCriteriaList = new map<int, pair<double,int> >;
315 int No = 0, FragOrder = 0;
316 double Value = 0.;
317 char *buffer = Malloc<char>(MAXSTRINGSIZE, "molecule::CheckOrderAtSite: *buffer");
318 sprintf(buffer, "%s/%s%s.dat", path, FRAGMENTPREFIX, ENERGYPERFRAGMENT);
319 ifstream InputFile(buffer, ios::in);
320
321 if (CountLinesinFile(InputFile) > 0) {
322 // each line represents a fragment root (Atom::nr) id and its energy contribution
323 InputFile.getline(buffer, MAXSTRINGSIZE); // skip comment lines
324 InputFile.getline(buffer, MAXSTRINGSIZE);
325 while(!InputFile.eof()) {
326 InputFile.getline(buffer, MAXSTRINGSIZE);
327 if (strlen(buffer) > 2) {
328 //Log() << Verbose(2) << "Scanning: " << buffer << endl;
329 stringstream line(buffer);
330 line >> FragOrder;
331 line >> ws >> No;
332 line >> ws >> Value; // skip time entry
333 line >> ws >> Value;
334 No -= 1; // indices start at 1 in file, not 0
335 //Log() << Verbose(2) << " - yields (" << No << "," << Value << ", " << FragOrder << ")" << endl;
336
337 // clean the list of those entries that have been superceded by higher order terms already
338 InsertIntoAdaptiveCriteriaList(AdaptiveCriteriaList, IndexKeySetList, FragOrder, No, Value);
339 }
340 }
341 // close and done
342 InputFile.close();
343 InputFile.clear();
344 }
345 Free(&buffer);
346
347 return AdaptiveCriteriaList;
348};
349
350/** Maps adaptive criteria list back onto (Value, (Root Nr., Order))
351 * (i.e. sorted by value to pick the highest ones)
352 * \param *out output stream for debugging
353 * \param &AdaptiveCriteriaList list to insert into
354 * \param *mol molecule with atoms
355 * \return remapped list
356 */
357map<double, pair<int,int> > * ReMapAdaptiveCriteriaListToValue(map<int, pair<double,int> > *AdaptiveCriteriaList, molecule *mol)
358{
359 atom *Walker = mol->start;
360 map<double, pair<int,int> > *FinalRootCandidates = new map<double, pair<int,int> > ;
361 Log() << Verbose(1) << "Root candidate list is: " << endl;
362 for(map<int, pair<double,int> >::iterator runner = AdaptiveCriteriaList->begin(); runner != AdaptiveCriteriaList->end(); runner++) {
363 Walker = mol->FindAtom((*runner).first);
364 if (Walker != NULL) {
365 //if ((*runner).second.second >= Walker->AdaptiveOrder) { // only insert if this is an "active" root site for the current order
366 if (!Walker->MaxOrder) {
367 Log() << Verbose(2) << "(" << (*runner).first << ",[" << (*runner).second.first << "," << (*runner).second.second << "])" << endl;
368 FinalRootCandidates->insert( make_pair( (*runner).second.first, pair<int,int>((*runner).first, (*runner).second.second) ) );
369 } else {
370 Log() << Verbose(2) << "Excluding (" << *Walker << ", " << (*runner).first << ",[" << (*runner).second.first << "," << (*runner).second.second << "]), as it has reached its maximum order." << endl;
371 }
372 } else {
373 eLog() << Verbose(0) << "Atom No. " << (*runner).second.first << " was not found in this molecule." << endl;
374 performCriticalExit();
375 }
376 }
377 return FinalRootCandidates;
378};
379
380/** Marks all candidate sites for update if below adaptive threshold.
381 * Picks a given number of highest values and set *AtomMask to true.
382 * \param *out output stream for debugging
383 * \param *AtomMask defines true/false per global Atom::nr to mask in/out each nuclear site, used to activate given number of site to increment order adaptively
384 * \param FinalRootCandidates list candidates to check
385 * \param Order desired order
386 * \param *mol molecule with atoms
387 * \return true - if update is necessary, false - not
388 */
389bool MarkUpdateCandidates(bool *AtomMask, map<double, pair<int,int> > &FinalRootCandidates, int Order, molecule *mol)
390{
391 atom *Walker = mol->start;
392 int No = -1;
393 bool status = false;
394 for(map<double, pair<int,int> >::iterator runner = FinalRootCandidates.upper_bound(pow(10.,Order)); runner != FinalRootCandidates.end(); runner++) {
395 No = (*runner).second.first;
396 Walker = mol->FindAtom(No);
397 //if (Walker->AdaptiveOrder < MinimumRingSize[Walker->nr]) {
398 Log() << Verbose(2) << "Root " << No << " is still above threshold (10^{" << Order <<"}: " << runner->first << ", setting entry " << No << " of Atom mask to true." << endl;
399 AtomMask[No] = true;
400 status = true;
401 //} else
402 //Log() << Verbose(2) << "Root " << No << " is still above threshold (10^{" << Order <<"}: " << runner->first << ", however MinimumRingSize of " << MinimumRingSize[Walker->nr] << " does not allow further adaptive increase." << endl;
403 }
404 return status;
405};
406
407/** print atom mask for debugging.
408 * \param *out output stream for debugging
409 * \param *AtomMask defines true/false per global Atom::nr to mask in/out each nuclear site, used to activate given number of site to increment order adaptively
410 * \param AtomCount number of entries in \a *AtomMask
411 */
412void PrintAtomMask(bool *AtomMask, int AtomCount)
413{
414 Log() << Verbose(2) << " ";
415 for(int i=0;i<AtomCount;i++)
416 Log() << Verbose(0) << (i % 10);
417 Log() << Verbose(0) << endl;
418 Log() << Verbose(2) << "Atom mask is: ";
419 for(int i=0;i<AtomCount;i++)
420 Log() << Verbose(0) << (AtomMask[i] ? "t" : "f");
421 Log() << Verbose(0) << endl;
422};
423
424/** Checks whether the OrderAtSite is still below \a Order at some site.
425 * \param *out output stream for debugging
426 * \param *AtomMask defines true/false per global Atom::nr to mask in/out each nuclear site, used to activate given number of site to increment order adaptively
427 * \param *GlobalKeySetList list of keysets with global ids (valid in "this" molecule) needed for adaptive increase
428 * \param Order desired Order if positive, desired exponent in threshold criteria if negative (0 is single-step)
429 * \param *MinimumRingSize array of max. possible order to avoid loops
430 * \param *path path to ENERGYPERFRAGMENT file (may be NULL if Order is non-negative)
431 * \return true - needs further fragmentation, false - does not need fragmentation
432 */
433bool molecule::CheckOrderAtSite(bool *AtomMask, Graph *GlobalKeySetList, int Order, int *MinimumRingSize, char *path)
434{
435 atom *Walker = start;
436 bool status = false;
437
438 // initialize mask list
439 for(int i=AtomCount;i--;)
440 AtomMask[i] = false;
441
442 if (Order < 0) { // adaptive increase of BondOrder per site
443 if (AtomMask[AtomCount] == true) // break after one step
444 return false;
445
446 // transmorph graph keyset list into indexed KeySetList
447 if (GlobalKeySetList == NULL) {
448 eLog() << Verbose(1) << "Given global key set list (graph) is NULL!" << endl;
449 return false;
450 }
451 map<int,KeySet> *IndexKeySetList = GraphToIndexedKeySet(GlobalKeySetList);
452
453 // parse the EnergyPerFragment file
454 map<int, pair<double,int> > *AdaptiveCriteriaList = ScanAdaptiveFileIntoMap(path, *IndexKeySetList); // (Root No., (Value, Order)) !
455 if (AdaptiveCriteriaList->empty()) {
456 eLog() << Verbose(2) << "Unable to parse file, incrementing all." << endl;
457 while (Walker->next != end) {
458 Walker = Walker->next;
459 #ifdef ADDHYDROGEN
460 if (Walker->type->Z != 1) // skip hydrogen
461 #endif
462 {
463 AtomMask[Walker->nr] = true; // include all (non-hydrogen) atoms
464 status = true;
465 }
466 }
467 }
468 // then map back onto (Value, (Root Nr., Order)) (i.e. sorted by value to pick the highest ones)
469 map<double, pair<int,int> > *FinalRootCandidates = ReMapAdaptiveCriteriaListToValue(AdaptiveCriteriaList, this);
470
471 // pick the ones still below threshold and mark as to be adaptively updated
472 MarkUpdateCandidates(AtomMask, *FinalRootCandidates, Order, this);
473
474 Free(&IndexKeySetList);
475 Free(&AdaptiveCriteriaList);
476 Free(&FinalRootCandidates);
477 } else { // global increase of Bond Order
478 while (Walker->next != end) {
479 Walker = Walker->next;
480 #ifdef ADDHYDROGEN
481 if (Walker->type->Z != 1) // skip hydrogen
482 #endif
483 {
484 AtomMask[Walker->nr] = true; // include all (non-hydrogen) atoms
485 if ((Order != 0) && (Walker->AdaptiveOrder < Order)) // && (Walker->AdaptiveOrder < MinimumRingSize[Walker->nr]))
486 status = true;
487 }
488 }
489 if ((Order == 0) && (AtomMask[AtomCount] == false)) // single stepping, just check
490 status = true;
491
492 if (!status) {
493 if (Order == 0)
494 Log() << Verbose(1) << "Single stepping done." << endl;
495 else
496 Log() << Verbose(1) << "Order at every site is already equal or above desired order " << Order << "." << endl;
497 }
498 }
499
500 PrintAtomMask(AtomMask, AtomCount); // for debugging
501
502 return status;
503};
504
505/** Create a SortIndex to map from atomic labels to the sequence in which the atoms are given in the config file.
506 * \param *out output stream for debugging
507 * \param *&SortIndex Mapping array of size molecule::AtomCount
508 * \return true - success, false - failure of SortIndex alloc
509 */
510bool molecule::CreateMappingLabelsToConfigSequence(int *&SortIndex)
511{
512 if (SortIndex != NULL) {
513 Log() << Verbose(1) << "SortIndex is " << SortIndex << " and not NULL as expected." << endl;
514 return false;
515 }
516 SortIndex = Malloc<int>(AtomCount, "molecule::CreateMappingLabelsToConfigSequence: *SortIndex");
517 for(int i=AtomCount;i--;)
518 SortIndex[i] = -1;
519
520 int AtomNo = 0;
521 SetIndexedArrayForEachAtomTo( SortIndex, &atom::nr, &IncrementalAbsoluteValue, AtomNo );
522
523 return true;
524};
525
526/** Performs a many-body bond order analysis for a given bond order.
527 * -# parses adjacency, keysets and orderatsite files
528 * -# performs DFS to find connected subgraphs (to leave this in was a design decision: might be useful later)
529 * -# RootStack is created for every subgraph (here, later we implement the "update 10 sites with highest energ
530y contribution", and that's why this consciously not done in the following loop)
531 * -# in a loop over all subgraphs
532 * -# calls FragmentBOSSANOVA with this RootStack and within the subgraph molecule structure
533 * -# creates molecule (fragment)s from the returned keysets (StoreFragmentFromKeySet)
534 * -# combines the generated molecule lists from all subgraphs
535 * -# saves to disk: fragment configs, adjacency, orderatsite, keyset files
536 * Note that as we split "this" molecule up into a list of subgraphs, i.e. a MoleculeListClass, we have two sets
537 * of vertex indices: Global always means the index in "this" molecule, whereas local refers to the molecule or
538 * subgraph in the MoleculeListClass.
539 * \param *out output stream for debugging
540 * \param Order up to how many neighbouring bonds a fragment contains in BondOrderScheme::BottumUp scheme
541 * \param *configuration configuration for writing config files for each fragment
542 * \return 1 - continue, 2 - stop (no fragmentation occured)
543 */
544int molecule::FragmentMolecule(int Order, config *configuration)
545{
546 MoleculeListClass *BondFragments = NULL;
547 int *SortIndex = NULL;
548 int *MinimumRingSize = new int[AtomCount];
549 int FragmentCounter;
550 MoleculeLeafClass *MolecularWalker = NULL;
551 MoleculeLeafClass *Subgraphs = NULL; // list of subgraphs from DFS analysis
552 fstream File;
553 bool FragmentationToDo = true;
554 class StackClass<bond *> *BackEdgeStack = NULL, *LocalBackEdgeStack = NULL;
555 bool CheckOrder = false;
556 Graph **FragmentList = NULL;
557 Graph *ParsedFragmentList = NULL;
558 Graph TotalGraph; // graph with all keysets however local numbers
559 int TotalNumberOfKeySets = 0;
560 atom **ListOfAtoms = NULL;
561 atom ***ListOfLocalAtoms = NULL;
562 bool *AtomMask = NULL;
563
564 Log() << Verbose(0) << endl;
565#ifdef ADDHYDROGEN
566 Log() << Verbose(0) << "I will treat hydrogen special and saturate dangling bonds with it." << endl;
567#else
568 Log() << Verbose(0) << "Hydrogen is treated just like the rest of the lot." << endl;
569#endif
570
571 // ++++++++++++++++++++++++++++ INITIAL STUFF: Bond structure analysis, file parsing, ... ++++++++++++++++++++++++++++++++++++++++++
572
573 // ===== 1. Check whether bond structure is same as stored in files ====
574
575 // create lookup table for Atom::nr
576 FragmentationToDo = FragmentationToDo && CreateFatherLookupTable(start, end, ListOfAtoms, AtomCount);
577
578 // === compare it with adjacency file ===
579 FragmentationToDo = FragmentationToDo && CheckAdjacencyFileAgainstMolecule(configuration->configpath, ListOfAtoms);
580 Free(&ListOfAtoms);
581
582 // ===== 2. perform a DFS analysis to gather info on cyclic structure and a list of disconnected subgraphs =====
583 Subgraphs = DepthFirstSearchAnalysis(BackEdgeStack);
584
585 // analysis of the cycles (print rings, get minimum cycle length) for each subgraph
586 for(int i=AtomCount;i--;)
587 MinimumRingSize[i] = AtomCount;
588 MolecularWalker = Subgraphs;
589 FragmentCounter = 0;
590 while (MolecularWalker->next != NULL) {
591 MolecularWalker = MolecularWalker->next;
592 // fill the bond structure of the individually stored subgraphs
593 MolecularWalker->FillBondStructureFromReference(this, FragmentCounter, ListOfLocalAtoms, false); // we want to keep the created ListOfLocalAtoms
594 Log() << Verbose(0) << "Analysing the cycles of subgraph " << MolecularWalker->Leaf << " with nr. " << FragmentCounter << "." << endl;
595 LocalBackEdgeStack = new StackClass<bond *> (MolecularWalker->Leaf->BondCount);
596// // check the list of local atoms for debugging
597// Log() << Verbose(0) << "ListOfLocalAtoms for this subgraph is:" << endl;
598// for (int i=0;i<AtomCount;i++)
599// if (ListOfLocalAtoms[FragmentCounter][i] == NULL)
600// Log() << Verbose(0) << "\tNULL";
601// else
602// Log() << Verbose(0) << "\t" << ListOfLocalAtoms[FragmentCounter][i]->Name;
603 Log() << Verbose(0) << "Gathering local back edges for subgraph " << MolecularWalker->Leaf << " with nr. " << FragmentCounter << "." << endl;
604 MolecularWalker->Leaf->PickLocalBackEdges(ListOfLocalAtoms[FragmentCounter++], BackEdgeStack, LocalBackEdgeStack);
605 Log() << Verbose(0) << "Analysing the cycles of subgraph " << MolecularWalker->Leaf << " with nr. " << FragmentCounter << "." << endl;
606 MolecularWalker->Leaf->CyclicStructureAnalysis(LocalBackEdgeStack, MinimumRingSize);
607 Log() << Verbose(0) << "Done with Analysing the cycles of subgraph " << MolecularWalker->Leaf << " with nr. " << FragmentCounter << "." << endl;
608 delete(LocalBackEdgeStack);
609 }
610 delete(BackEdgeStack);
611
612 // ===== 3. if structure still valid, parse key set file and others =====
613 FragmentationToDo = FragmentationToDo && ParseKeySetFile(configuration->configpath, ParsedFragmentList);
614
615 // ===== 4. check globally whether there's something to do actually (first adaptivity check)
616 FragmentationToDo = FragmentationToDo && ParseOrderAtSiteFromFile(configuration->configpath);
617
618 // =================================== Begin of FRAGMENTATION ===============================
619 // ===== 6a. assign each keyset to its respective subgraph =====
620 Subgraphs->next->AssignKeySetsToFragment(this, ParsedFragmentList, ListOfLocalAtoms, FragmentList, (FragmentCounter = 0), true);
621
622 // ===== 6b. prepare and go into the adaptive (Order<0), single-step (Order==0) or incremental (Order>0) cycle
623 KeyStack *RootStack = new KeyStack[Subgraphs->next->Count()];
624 AtomMask = new bool[AtomCount+1];
625 AtomMask[AtomCount] = false;
626 FragmentationToDo = false; // if CheckOrderAtSite just ones recommends fragmentation, we will save fragments afterwards
627 while ((CheckOrder = CheckOrderAtSite(AtomMask, ParsedFragmentList, Order, MinimumRingSize, configuration->configpath))) {
628 FragmentationToDo = FragmentationToDo || CheckOrder;
629 AtomMask[AtomCount] = true; // last plus one entry is used as marker that we have been through this loop once already in CheckOrderAtSite()
630 // ===== 6b. fill RootStack for each subgraph (second adaptivity check) =====
631 Subgraphs->next->FillRootStackForSubgraphs(RootStack, AtomMask, (FragmentCounter = 0));
632
633 // ===== 7. fill the bond fragment list =====
634 FragmentCounter = 0;
635 MolecularWalker = Subgraphs;
636 while (MolecularWalker->next != NULL) {
637 MolecularWalker = MolecularWalker->next;
638 Log() << Verbose(1) << "Fragmenting subgraph " << MolecularWalker << "." << endl;
639 //MolecularWalker->Leaf->OutputListOfBonds(out); // output atom::ListOfBonds for debugging
640 if (MolecularWalker->Leaf->first->next != MolecularWalker->Leaf->last) {
641 // call BOSSANOVA method
642 Log() << Verbose(0) << endl << " ========== BOND ENERGY of subgraph " << FragmentCounter << " ========================= " << endl;
643 MolecularWalker->Leaf->FragmentBOSSANOVA(FragmentList[FragmentCounter], RootStack[FragmentCounter], MinimumRingSize);
644 } else {
645 eLog() << Verbose(1) << "Subgraph " << MolecularWalker << " has no atoms!" << endl;
646 }
647 FragmentCounter++; // next fragment list
648 }
649 }
650 Log() << Verbose(2) << "CheckOrder is " << CheckOrder << "." << endl;
651 delete[](RootStack);
652 delete[](AtomMask);
653 delete(ParsedFragmentList);
654 delete[](MinimumRingSize);
655
656
657 // ==================================== End of FRAGMENTATION ============================================
658
659 // ===== 8a. translate list into global numbers (i.e. ones that are valid in "this" molecule, not in MolecularWalker->Leaf)
660 Subgraphs->next->TranslateIndicesToGlobalIDs(FragmentList, (FragmentCounter = 0), TotalNumberOfKeySets, TotalGraph);
661
662 // free subgraph memory again
663 FragmentCounter = 0;
664 if (Subgraphs != NULL) {
665 while (Subgraphs->next != NULL) {
666 Subgraphs = Subgraphs->next;
667 delete(FragmentList[FragmentCounter++]);
668 delete(Subgraphs->previous);
669 }
670 delete(Subgraphs);
671 }
672 Free(&FragmentList);
673
674 // ===== 8b. gather keyset lists (graphs) from all subgraphs and transform into MoleculeListClass =====
675 //if (FragmentationToDo) { // we should always store the fragments again as coordination might have changed slightly without changing bond structure
676 // allocate memory for the pointer array and transmorph graphs into full molecular fragments
677 BondFragments = new MoleculeListClass();
678 int k=0;
679 for(Graph::iterator runner = TotalGraph.begin(); runner != TotalGraph.end(); runner++) {
680 KeySet test = (*runner).first;
681 Log() << Verbose(0) << "Fragment No." << (*runner).second.first << " with TEFactor " << (*runner).second.second << "." << endl;
682 BondFragments->insert(StoreFragmentFromKeySet(test, configuration));
683 k++;
684 }
685 Log() << Verbose(0) << k << "/" << BondFragments->ListOfMolecules.size() << " fragments generated from the keysets." << endl;
686
687 // ===== 9. Save fragments' configuration and keyset files et al to disk ===
688 if (BondFragments->ListOfMolecules.size() != 0) {
689 // create the SortIndex from BFS labels to order in the config file
690 CreateMappingLabelsToConfigSequence(SortIndex);
691
692 Log() << Verbose(1) << "Writing " << BondFragments->ListOfMolecules.size() << " possible bond fragmentation configs" << endl;
693 if (BondFragments->OutputConfigForListOfFragments(configuration, SortIndex))
694 Log() << Verbose(1) << "All configs written." << endl;
695 else
696 Log() << Verbose(1) << "Some config writing failed." << endl;
697
698 // store force index reference file
699 BondFragments->StoreForcesFile(configuration->configpath, SortIndex);
700
701 // store keysets file
702 StoreKeySetFile(TotalGraph, configuration->configpath);
703
704 // store Adjacency file
705 char *filename = Malloc<char> (MAXSTRINGSIZE, "molecule::FragmentMolecule - *filename");
706 strcpy(filename, FRAGMENTPREFIX);
707 strcat(filename, ADJACENCYFILE);
708 StoreAdjacencyToFile(configuration->configpath, filename);
709 Free(&filename);
710
711 // store Hydrogen saturation correction file
712 BondFragments->AddHydrogenCorrection(configuration->configpath);
713
714 // store adaptive orders into file
715 StoreOrderAtSiteFile(configuration->configpath);
716
717 // restore orbital and Stop values
718 CalculateOrbitals(*configuration);
719
720 // free memory for bond part
721 Log() << Verbose(1) << "Freeing bond memory" << endl;
722 delete(FragmentList); // remove bond molecule from memory
723 Free(&SortIndex);
724 } else {
725 Log() << Verbose(1) << "FragmentList is zero on return, splitting failed." << endl;
726 }
727 delete(BondFragments);
728 Log() << Verbose(0) << "End of bond fragmentation." << endl;
729
730 return ((int)(!FragmentationToDo)+1); // 1 - continue, 2 - stop (no fragmentation occured)
731};
732
733
734/** Stores pairs (Atom::nr, Atom::AdaptiveOrder) into file.
735 * Atoms not present in the file get "-1".
736 * \param *out output stream for debugging
737 * \param *path path to file ORDERATSITEFILE
738 * \return true - file writable, false - not writable
739 */
740bool molecule::StoreOrderAtSiteFile(char *path)
741{
742 stringstream line;
743 ofstream file;
744
745 line << path << "/" << FRAGMENTPREFIX << ORDERATSITEFILE;
746 file.open(line.str().c_str());
747 Log() << Verbose(1) << "Writing OrderAtSite " << ORDERATSITEFILE << " ... " << endl;
748 if (file != NULL) {
749 ActOnAllAtoms( &atom::OutputOrder, &file );
750 file.close();
751 Log() << Verbose(1) << "done." << endl;
752 return true;
753 } else {
754 Log() << Verbose(1) << "failed to open file " << line.str() << "." << endl;
755 return false;
756 }
757};
758
759/** Parses pairs(Atom::nr, Atom::AdaptiveOrder) from file and stores in molecule's Atom's.
760 * Atoms not present in the file get "0".
761 * \param *out output stream for debugging
762 * \param *path path to file ORDERATSITEFILEe
763 * \return true - file found and scanned, false - file not found
764 * \sa ParseKeySetFile() and CheckAdjacencyFileAgainstMolecule() as this is meant to be used in conjunction with the two
765 */
766bool molecule::ParseOrderAtSiteFromFile(char *path)
767{
768 unsigned char *OrderArray = Calloc<unsigned char>(AtomCount, "molecule::ParseOrderAtSiteFromFile - *OrderArray");
769 bool *MaxArray = Calloc<bool>(AtomCount, "molecule::ParseOrderAtSiteFromFile - *MaxArray");
770 bool status;
771 int AtomNr, value;
772 stringstream line;
773 ifstream file;
774
775 Log() << Verbose(1) << "Begin of ParseOrderAtSiteFromFile" << endl;
776 line << path << "/" << FRAGMENTPREFIX << ORDERATSITEFILE;
777 file.open(line.str().c_str());
778 if (file != NULL) {
779 while (!file.eof()) { // parse from file
780 AtomNr = -1;
781 file >> AtomNr;
782 if (AtomNr != -1) { // test whether we really parsed something (this is necessary, otherwise last atom is set twice and to 0 on second time)
783 file >> value;
784 OrderArray[AtomNr] = value;
785 file >> value;
786 MaxArray[AtomNr] = value;
787 //Log() << Verbose(2) << "AtomNr " << AtomNr << " with order " << (int)OrderArray[AtomNr] << " and max order set to " << (int)MaxArray[AtomNr] << "." << endl;
788 }
789 }
790 file.close();
791
792 // set atom values
793 SetAtomValueToIndexedArray( OrderArray, &atom::nr, &atom::AdaptiveOrder );
794 SetAtomValueToIndexedArray( MaxArray, &atom::nr, &atom::MaxOrder );
795
796 Log() << Verbose(1) << "done." << endl;
797 status = true;
798 } else {
799 Log() << Verbose(1) << "failed to open file " << line.str() << "." << endl;
800 status = false;
801 }
802 Free(&OrderArray);
803 Free(&MaxArray);
804
805 Log() << Verbose(1) << "End of ParseOrderAtSiteFromFile" << endl;
806 return status;
807};
808
809
810
811/** Looks through a StackClass<atom *> and returns the likeliest removal candiate.
812 * \param *out output stream for debugging messages
813 * \param *&Leaf KeySet to look through
814 * \param *&ShortestPathList list of the shortest path to decide which atom to suggest as removal candidate in the end
815 * \param index of the atom suggested for removal
816 */
817int molecule::LookForRemovalCandidate(KeySet *&Leaf, int *&ShortestPathList)
818{
819 atom *Runner = NULL;
820 int SP, Removal;
821
822 Log() << Verbose(2) << "Looking for removal candidate." << endl;
823 SP = -1; //0; // not -1, so that Root is never removed
824 Removal = -1;
825 for (KeySet::iterator runner = Leaf->begin(); runner != Leaf->end(); runner++) {
826 Runner = FindAtom((*runner));
827 if (Runner->type->Z != 1) { // skip all those added hydrogens when re-filling snake stack
828 if (ShortestPathList[(*runner)] > SP) { // remove the oldest one with longest shortest path
829 SP = ShortestPathList[(*runner)];
830 Removal = (*runner);
831 }
832 }
833 }
834 return Removal;
835};
836
837/** Initializes some value for putting fragment of \a *mol into \a *Leaf.
838 * \param *mol total molecule
839 * \param *Leaf fragment molecule
840 * \param &Leaflet pointer to KeySet structure
841 * \param **SonList calloc'd list which atom of \a *Leaf is a son of which atom in \a *mol
842 * \return number of atoms in fragment
843 */
844int StoreFragmentFromKeySet_Init(molecule *mol, molecule *Leaf, KeySet &Leaflet, atom **SonList)
845{
846 atom *FatherOfRunner = NULL;
847
848 Leaf->BondDistance = mol->BondDistance;
849 for(int i=NDIM*2;i--;)
850 Leaf->cell_size[i] = mol->cell_size[i];
851
852 // first create the minimal set of atoms from the KeySet
853 int size = 0;
854 for(KeySet::iterator runner = Leaflet.begin(); runner != Leaflet.end(); runner++) {
855 FatherOfRunner = mol->FindAtom((*runner)); // find the id
856 SonList[FatherOfRunner->nr] = Leaf->AddCopyAtom(FatherOfRunner);
857 size++;
858 }
859 return size;
860};
861
862/** Creates an induced subgraph out of a fragmental key set, adding bonds and hydrogens (if treated specially).
863 * \param *out output stream for debugging messages
864 * \param *mol total molecule
865 * \param *Leaf fragment molecule
866 * \param IsAngstroem whether we have Ansgtroem or bohrradius
867 * \param **SonList list which atom of \a *Leaf is a son of which atom in \a *mol
868 */
869void CreateInducedSubgraphOfFragment(molecule *mol, molecule *Leaf, atom **SonList, bool IsAngstroem)
870{
871 bool LonelyFlag = false;
872 atom *OtherFather = NULL;
873 atom *FatherOfRunner = NULL;
874 Leaf->CountAtoms();
875
876 atom *Runner = Leaf->start;
877 while (Runner->next != Leaf->end) {
878 Runner = Runner->next;
879 LonelyFlag = true;
880 FatherOfRunner = Runner->father;
881 if (SonList[FatherOfRunner->nr] != NULL) { // check if this, our father, is present in list
882 // create all bonds
883 for (BondList::const_iterator BondRunner = FatherOfRunner->ListOfBonds.begin(); BondRunner != FatherOfRunner->ListOfBonds.end(); (++BondRunner)) {
884 OtherFather = (*BondRunner)->GetOtherAtom(FatherOfRunner);
885// Log() << Verbose(2) << "Father " << *FatherOfRunner << " of son " << *SonList[FatherOfRunner->nr] << " is bound to " << *OtherFather;
886 if (SonList[OtherFather->nr] != NULL) {
887// Log() << Verbose(0) << ", whose son is " << *SonList[OtherFather->nr] << "." << endl;
888 if (OtherFather->nr > FatherOfRunner->nr) { // add bond (nr check is for adding only one of both variants: ab, ba)
889// Log() << Verbose(3) << "Adding Bond: ";
890// Log() << Verbose(0) <<
891 Leaf->AddBond(Runner, SonList[OtherFather->nr], (*BondRunner)->BondDegree);
892// Log() << Verbose(0) << "." << endl;
893 //NumBonds[Runner->nr]++;
894 } else {
895// Log() << Verbose(3) << "Not adding bond, labels in wrong order." << endl;
896 }
897 LonelyFlag = false;
898 } else {
899// Log() << Verbose(0) << ", who has no son in this fragment molecule." << endl;
900#ifdef ADDHYDROGEN
901 //Log() << Verbose(3) << "Adding Hydrogen to " << Runner->Name << " and a bond in between." << endl;
902 if(!Leaf->AddHydrogenReplacementAtom((*BondRunner), Runner, FatherOfRunner, OtherFather, IsAngstroem))
903 exit(1);
904#endif
905 //NumBonds[Runner->nr] += Binder->BondDegree;
906 }
907 }
908 } else {
909 eLog() << Verbose(1) << "Son " << Runner->Name << " has father " << FatherOfRunner->Name << " but its entry in SonList is " << SonList[FatherOfRunner->nr] << "!" << endl;
910 }
911 if ((LonelyFlag) && (Leaf->AtomCount > 1)) {
912 Log() << Verbose(0) << *Runner << "has got bonds only to hydrogens!" << endl;
913 }
914#ifdef ADDHYDROGEN
915 while ((Runner->next != Leaf->end) && (Runner->next->type->Z == 1)) // skip added hydrogen
916 Runner = Runner->next;
917#endif
918 }
919};
920
921/** Stores a fragment from \a KeySet into \a molecule.
922 * First creates the minimal set of atoms from the KeySet, then creates the bond structure from the complete
923 * molecule and adds missing hydrogen where bonds were cut.
924 * \param *out output stream for debugging messages
925 * \param &Leaflet pointer to KeySet structure
926 * \param IsAngstroem whether we have Ansgtroem or bohrradius
927 * \return pointer to constructed molecule
928 */
929molecule * molecule::StoreFragmentFromKeySet(KeySet &Leaflet, bool IsAngstroem)
930{
931 atom **SonList = Calloc<atom*>(AtomCount, "molecule::StoreFragmentFromStack: **SonList");
932 molecule *Leaf = new molecule(elemente);
933
934// Log() << Verbose(1) << "Begin of StoreFragmentFromKeyset." << endl;
935 StoreFragmentFromKeySet_Init(this, Leaf, Leaflet, SonList);
936 // create the bonds between all: Make it an induced subgraph and add hydrogen
937// Log() << Verbose(2) << "Creating bonds from father graph (i.e. induced subgraph creation)." << endl;
938 CreateInducedSubgraphOfFragment(this, Leaf, SonList, IsAngstroem);
939
940 //Leaflet->Leaf->ScanForPeriodicCorrection(out);
941 Free(&SonList);
942// Log() << Verbose(1) << "End of StoreFragmentFromKeyset." << endl;
943 return Leaf;
944};
945
946
947/** Clears the touched list
948 * \param *out output stream for debugging
949 * \param verbosity verbosity level
950 * \param *&TouchedList touched list
951 * \param SubOrder current suborder
952 * \param TouchedIndex currently touched
953 */
954void SPFragmentGenerator_ClearingTouched(int verbosity, int *&TouchedList, int SubOrder, int &TouchedIndex)
955{
956 Log() << Verbose(1+verbosity) << "Clearing touched list." << endl;
957 for (TouchedIndex=SubOrder+1;TouchedIndex--;) // empty touched list
958 TouchedList[TouchedIndex] = -1;
959 TouchedIndex = 0;
960
961}
962
963/** Adds the current combination of the power set to the snake stack.
964 * \param *out output stream for debugging
965 * \param verbosity verbosity level
966 * \param CurrentCombination
967 * \param SetDimension maximum number of bits in power set
968 * \param *FragmentSet snake stack to remove from
969 * \param *&TouchedList touched list
970 * \param TouchedIndex currently touched
971 * \return number of set bits
972 */
973int AddPowersetToSnakeStack(int verbosity, int CurrentCombination, int SetDimension, KeySet *FragmentSet, bond **BondsSet, int *&TouchedList, int &TouchedIndex)
974{
975 atom *OtherWalker = NULL;
976 bool bit = false;
977 KeySetTestPair TestKeySetInsert;
978
979 int Added = 0;
980 for (int j=0;j<SetDimension;j++) { // pull out every bit by shifting
981 bit = ((CurrentCombination & (1 << j)) != 0); // mask the bit for the j-th bond
982 if (bit) { // if bit is set, we add this bond partner
983 OtherWalker = BondsSet[j]->rightatom; // rightatom is always the one more distant, i.e. the one to add
984 //Log() << Verbose(1+verbosity) << "Current Bond is " << BondsSet[j] << ", checking on " << *OtherWalker << "." << endl;
985 Log() << Verbose(2+verbosity) << "Adding " << *OtherWalker << " with nr " << OtherWalker->nr << "." << endl;
986 TestKeySetInsert = FragmentSet->insert(OtherWalker->nr);
987 if (TestKeySetInsert.second) {
988 TouchedList[TouchedIndex++] = OtherWalker->nr; // note as added
989 Added++;
990 } else {
991 Log() << Verbose(2+verbosity) << "This was item was already present in the keyset." << endl;
992 }
993 } else {
994 Log() << Verbose(2+verbosity) << "Not adding." << endl;
995 }
996 }
997 return Added;
998};
999
1000/** Counts the number of elements in a power set.
1001 * \param *SetFirst
1002 * \param *SetLast
1003 * \param *&TouchedList touched list
1004 * \param TouchedIndex currently touched
1005 * \return number of elements
1006 */
1007int CountSetMembers(bond *SetFirst, bond *SetLast, int *&TouchedList, int TouchedIndex)
1008{
1009 int SetDimension = 0;
1010 bond *Binder = SetFirst; // start node for this level
1011 while (Binder->next != SetLast) { // compare to end node of this level
1012 Binder = Binder->next;
1013 for (int k=TouchedIndex;k--;) {
1014 if (Binder->Contains(TouchedList[k])) // if we added this very endpiece
1015 SetDimension++;
1016 }
1017 }
1018 return SetDimension;
1019};
1020
1021/** Counts the number of elements in a power set.
1022 * \param *BondsList bonds list to fill
1023 * \param *SetFirst
1024 * \param *SetLast
1025 * \param *&TouchedList touched list
1026 * \param TouchedIndex currently touched
1027 * \return number of elements
1028 */
1029int FillBondsList(bond **BondsList, bond *SetFirst, bond *SetLast, int *&TouchedList, int TouchedIndex)
1030{
1031 int SetDimension = 0;
1032 bond *Binder = SetFirst; // start node for this level
1033 while (Binder->next != SetLast) { // compare to end node of this level
1034 Binder = Binder->next;
1035 for (int k=0;k<TouchedIndex;k++) {
1036 if (Binder->leftatom->nr == TouchedList[k]) // leftatom is always the close one
1037 BondsList[SetDimension++] = Binder;
1038 }
1039 }
1040 return SetDimension;
1041};
1042
1043/** Remove all items that were added on this SP level.
1044 * \param *out output stream for debugging
1045 * \param verbosity verbosity level
1046 * \param *FragmentSet snake stack to remove from
1047 * \param *&TouchedList touched list
1048 * \param TouchedIndex currently touched
1049 */
1050void RemoveAllTouchedFromSnakeStack(int verbosity, KeySet *FragmentSet, int *&TouchedList, int &TouchedIndex)
1051{
1052 int Removal = 0;
1053 for(int j=0;j<TouchedIndex;j++) {
1054 Removal = TouchedList[j];
1055 Log() << Verbose(2+verbosity) << "Removing item nr. " << Removal << " from snake stack." << endl;
1056 FragmentSet->erase(Removal);
1057 TouchedList[j] = -1;
1058 }
1059 Log() << Verbose(2) << "Remaining local nr.s on snake stack are: ";
1060 for(KeySet::iterator runner = FragmentSet->begin(); runner != FragmentSet->end(); runner++)
1061 Log() << Verbose(0) << (*runner) << " ";
1062 Log() << Verbose(0) << endl;
1063 TouchedIndex = 0; // set Index to 0 for list of atoms added on this level
1064};
1065
1066/** From a given set of Bond sorted by Shortest Path distance, create all possible fragments of size \a SetDimension.
1067 * -# loops over every possible combination (2^dimension of edge set)
1068 * -# inserts current set, if there's still space left
1069 * -# yes: calls SPFragmentGenerator with structure, created new edge list and size respective to root dist
1070ance+1
1071 * -# no: stores fragment into keyset list by calling InsertFragmentIntoGraph
1072 * -# removes all items added into the snake stack (in UniqueFragments structure) added during level (root
1073distance) and current set
1074 * \param *out output stream for debugging
1075 * \param FragmentSearch UniqueFragments structure with all values needed
1076 * \param RootDistance current shortest path level, whose set of edges is represented by **BondsSet
1077 * \param SetDimension Number of possible bonds on this level (i.e. size of the array BondsSet[])
1078 * \param SubOrder remaining number of allowed vertices to add
1079 */
1080void molecule::SPFragmentGenerator(struct UniqueFragments *FragmentSearch, int RootDistance, bond **BondsSet, int SetDimension, int SubOrder)
1081{
1082 int verbosity = 0; //FragmentSearch->ANOVAOrder-SubOrder;
1083 int NumCombinations;
1084 int bits, TouchedIndex, SubSetDimension, SP, Added;
1085 int SpaceLeft;
1086 int *TouchedList = Malloc<int>(SubOrder + 1, "molecule::SPFragmentGenerator: *TouchedList");
1087 bond **BondsList = NULL;
1088 KeySetTestPair TestKeySetInsert;
1089
1090 NumCombinations = 1 << SetDimension;
1091
1092 // here for all bonds of Walker all combinations of end pieces (from the bonds)
1093 // have to be added and for the remaining ANOVA order GraphCrawler be called
1094 // recursively for the next level
1095
1096 Log() << Verbose(1+verbosity) << "Begin of SPFragmentGenerator." << endl;
1097 Log() << Verbose(1+verbosity) << "We are " << RootDistance << " away from Root, which is " << *FragmentSearch->Root << ", SubOrder is " << SubOrder << ", SetDimension is " << SetDimension << " and this means " << NumCombinations-1 << " combination(s)." << endl;
1098
1099 // initialised touched list (stores added atoms on this level)
1100 SPFragmentGenerator_ClearingTouched(verbosity, TouchedList, SubOrder, TouchedIndex);
1101
1102 // create every possible combination of the endpieces
1103 Log() << Verbose(1+verbosity) << "Going through all combinations of the power set." << endl;
1104 for (int i=1;i<NumCombinations;i++) { // sweep through all power set combinations (skip empty set!)
1105 // count the set bit of i
1106 bits = 0;
1107 for (int j=SetDimension;j--;)
1108 bits += (i & (1 << j)) >> j;
1109
1110 Log() << Verbose(1+verbosity) << "Current set is " << Binary(i | (1 << SetDimension)) << ", number of bits is " << bits << "." << endl;
1111 if (bits <= SubOrder) { // if not greater than additional atoms allowed on stack, continue
1112 // --1-- add this set of the power set of bond partners to the snake stack
1113 Added = AddPowersetToSnakeStack(verbosity, i, SetDimension, FragmentSearch->FragmentSet, BondsSet, TouchedList, TouchedIndex);
1114
1115 SpaceLeft = SubOrder - Added ;// SubOrder - bits; // due to item's maybe being already present, this does not work anymore
1116 if (SpaceLeft > 0) {
1117 Log() << Verbose(1+verbosity) << "There's still some space left on stack: " << SpaceLeft << "." << endl;
1118 if (SubOrder > 1) { // Due to Added above we have to check extra whether we're not already reaching beyond the desired Order
1119 // --2-- look at all added end pieces of this combination, construct bond subsets and sweep through a power set of these by recursion
1120 SP = RootDistance+1; // this is the next level
1121
1122 // first count the members in the subset
1123 SubSetDimension = CountSetMembers(FragmentSearch->BondsPerSPList[2*SP], FragmentSearch->BondsPerSPList[2*SP+1], TouchedList, TouchedIndex);
1124
1125 // then allocate and fill the list
1126 BondsList = Malloc<bond*>(SubSetDimension, "molecule::SPFragmentGenerator: **BondsList");
1127 SubSetDimension = FillBondsList(BondsList, FragmentSearch->BondsPerSPList[2*SP], FragmentSearch->BondsPerSPList[2*SP+1], TouchedList, TouchedIndex);
1128
1129 // then iterate
1130 Log() << Verbose(2+verbosity) << "Calling subset generator " << SP << " away from root " << *FragmentSearch->Root << " with sub set dimension " << SubSetDimension << "." << endl;
1131 SPFragmentGenerator(FragmentSearch, SP, BondsList, SubSetDimension, SubOrder-bits);
1132
1133 Free(&BondsList);
1134 }
1135 } else {
1136 // --2-- otherwise store the complete fragment
1137 Log() << Verbose(1+verbosity) << "Enough items on stack for a fragment!" << endl;
1138 // store fragment as a KeySet
1139 Log() << Verbose(2) << "Found a new fragment[" << FragmentSearch->FragmentCounter << "], local nr.s are: ";
1140 for(KeySet::iterator runner = FragmentSearch->FragmentSet->begin(); runner != FragmentSearch->FragmentSet->end(); runner++)
1141 Log() << Verbose(0) << (*runner) << " ";
1142 Log() << Verbose(0) << endl;
1143 //if (!CheckForConnectedSubgraph(FragmentSearch->FragmentSet))
1144 //eLog() << Verbose(1) << "The found fragment is not a connected subgraph!" << endl;
1145 InsertFragmentIntoGraph(FragmentSearch);
1146 }
1147
1148 // --3-- remove all added items in this level from snake stack
1149 Log() << Verbose(1+verbosity) << "Removing all items that were added on this SP level " << RootDistance << "." << endl;
1150 RemoveAllTouchedFromSnakeStack(verbosity, FragmentSearch->FragmentSet, TouchedList, TouchedIndex);
1151 } else {
1152 Log() << Verbose(2+verbosity) << "More atoms to add for this set (" << bits << ") than space left on stack " << SubOrder << ", skipping this set." << endl;
1153 }
1154 }
1155 Free(&TouchedList);
1156 Log() << Verbose(1+verbosity) << "End of SPFragmentGenerator, " << RootDistance << " away from Root " << *FragmentSearch->Root << " and SubOrder is " << SubOrder << "." << endl;
1157};
1158
1159/** Allocates memory for UniqueFragments::BondsPerSPList.
1160 * \param *out output stream
1161 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1162 * \param FragmentSearch UniqueFragments
1163 * \sa FreeSPList()
1164 */
1165void InitialiseSPList(int Order, struct UniqueFragments &FragmentSearch)
1166{
1167 FragmentSearch.BondsPerSPList = Malloc<bond*>(Order * 2, "molecule::PowerSetGenerator: ***BondsPerSPList");
1168 FragmentSearch.BondsPerSPCount = Malloc<int>(Order, "molecule::PowerSetGenerator: *BondsPerSPCount");
1169 for (int i=Order;i--;) {
1170 FragmentSearch.BondsPerSPList[2*i] = new bond(); // start node
1171 FragmentSearch.BondsPerSPList[2*i+1] = new bond(); // end node
1172 FragmentSearch.BondsPerSPList[2*i]->next = FragmentSearch.BondsPerSPList[2*i+1]; // intertwine these two
1173 FragmentSearch.BondsPerSPList[2*i+1]->previous = FragmentSearch.BondsPerSPList[2*i];
1174 FragmentSearch.BondsPerSPCount[i] = 0;
1175 }
1176};
1177
1178/** Free's memory for for UniqueFragments::BondsPerSPList.
1179 * \param *out output stream
1180 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1181 * \param FragmentSearch UniqueFragments\
1182 * \sa InitialiseSPList()
1183 */
1184void FreeSPList(int Order, struct UniqueFragments &FragmentSearch)
1185{
1186 Free(&FragmentSearch.BondsPerSPCount);
1187 for (int i=Order;i--;) {
1188 delete(FragmentSearch.BondsPerSPList[2*i]);
1189 delete(FragmentSearch.BondsPerSPList[2*i+1]);
1190 }
1191 Free(&FragmentSearch.BondsPerSPList);
1192};
1193
1194/** Sets FragmenSearch to initial value.
1195 * Sets UniqueFragments::ShortestPathList entries to zero, UniqueFragments::BondsPerSPCount to zero (except zero level to 1) and
1196 * adds initial bond UniqueFragments::Root to UniqueFragments::Root to UniqueFragments::BondsPerSPList
1197 * \param *out output stream
1198 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1199 * \param FragmentSearch UniqueFragments
1200 * \sa FreeSPList()
1201 */
1202void SetSPList(int Order, struct UniqueFragments &FragmentSearch)
1203{
1204 // prepare Label and SP arrays of the BFS search
1205 FragmentSearch.ShortestPathList[FragmentSearch.Root->nr] = 0;
1206
1207 // prepare root level (SP = 0) and a loop bond denoting Root
1208 for (int i=Order;i--;)
1209 FragmentSearch.BondsPerSPCount[i] = 0;
1210 FragmentSearch.BondsPerSPCount[0] = 1;
1211 bond *Binder = new bond(FragmentSearch.Root, FragmentSearch.Root);
1212 add(Binder, FragmentSearch.BondsPerSPList[1]);
1213};
1214
1215/** Resets UniqueFragments::ShortestPathList and cleans bonds from UniqueFragments::BondsPerSPList.
1216 * \param *out output stream
1217 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1218 * \param FragmentSearch UniqueFragments
1219 * \sa InitialiseSPList()
1220 */
1221void ResetSPList(int Order, struct UniqueFragments &FragmentSearch)
1222{
1223 bond *Binder = NULL;
1224 Log() << Verbose(0) << "Free'ing all found lists. and resetting index lists" << endl;
1225 for(int i=Order;i--;) {
1226 Log() << Verbose(1) << "Current SP level is " << i << ": ";
1227 Binder = FragmentSearch.BondsPerSPList[2*i];
1228 while (Binder->next != FragmentSearch.BondsPerSPList[2*i+1]) {
1229 Binder = Binder->next;
1230 // Log() << Verbose(0) << "Removing atom " << Binder->leftatom->nr << " and " << Binder->rightatom->nr << "." << endl; // make sure numbers are local
1231 FragmentSearch.ShortestPathList[Binder->leftatom->nr] = -1;
1232 FragmentSearch.ShortestPathList[Binder->rightatom->nr] = -1;
1233 }
1234 // delete added bonds
1235 cleanup(FragmentSearch.BondsPerSPList[2*i], FragmentSearch.BondsPerSPList[2*i+1]);
1236 // also start and end node
1237 Log() << Verbose(0) << "cleaned." << endl;
1238 }
1239};
1240
1241
1242/** Fills the Bonds per Shortest Path List and set the vertex labels.
1243 * \param *out output stream
1244 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1245 * \param FragmentSearch UniqueFragments
1246 * \param *mol molecule with atoms and bonds
1247 * \param RestrictedKeySet Restricted vertex set to use in context of molecule
1248 */
1249void FillSPListandLabelVertices(int Order, struct UniqueFragments &FragmentSearch, molecule *mol, KeySet RestrictedKeySet)
1250{
1251 // Actually, we should construct a spanning tree vom the root atom and select all edges therefrom and put them into
1252 // according shortest path lists. However, we don't. Rather we fill these lists right away, as they do form a spanning
1253 // tree already sorted into various SP levels. That's why we just do loops over the depth (CurrentSP) and breadth
1254 // (EdgeinSPLevel) of this tree ...
1255 // In another picture, the bonds always contain a direction by rightatom being the one more distant from root and hence
1256 // naturally leftatom forming its predecessor, preventing the BFS"seeker" from continuing in the wrong direction.
1257 int AtomKeyNr = -1;
1258 atom *Walker = NULL;
1259 atom *OtherWalker = NULL;
1260 atom *Predecessor = NULL;
1261 bond *CurrentEdge = NULL;
1262 bond *Binder = NULL;
1263 int RootKeyNr = FragmentSearch.Root->GetTrueFather()->nr;
1264 int RemainingWalkers = -1;
1265 int SP = -1;
1266
1267 Log() << Verbose(0) << "Starting BFS analysis ..." << endl;
1268 for (SP = 0; SP < (Order-1); SP++) {
1269 Log() << Verbose(1) << "New SP level reached: " << SP << ", creating new SP list with " << FragmentSearch.BondsPerSPCount[SP] << " item(s)";
1270 if (SP > 0) {
1271 Log() << Verbose(0) << ", old level closed with " << FragmentSearch.BondsPerSPCount[SP-1] << " item(s)." << endl;
1272 FragmentSearch.BondsPerSPCount[SP] = 0;
1273 } else
1274 Log() << Verbose(0) << "." << endl;
1275
1276 RemainingWalkers = FragmentSearch.BondsPerSPCount[SP];
1277 CurrentEdge = FragmentSearch.BondsPerSPList[2*SP]; /// start of this SP level's list
1278 while (CurrentEdge->next != FragmentSearch.BondsPerSPList[2*SP+1]) { /// end of this SP level's list
1279 CurrentEdge = CurrentEdge->next;
1280 RemainingWalkers--;
1281 Walker = CurrentEdge->rightatom; // rightatom is always the one more distant
1282 Predecessor = CurrentEdge->leftatom; // ... and leftatom is predecessor
1283 AtomKeyNr = Walker->nr;
1284 Log() << Verbose(0) << "Current Walker is: " << *Walker << " with nr " << Walker->nr << " and SP of " << SP << ", with " << RemainingWalkers << " remaining walkers on this level." << endl;
1285 // check for new sp level
1286 // go through all its bonds
1287 Log() << Verbose(1) << "Going through all bonds of Walker." << endl;
1288 for (BondList::const_iterator Runner = Walker->ListOfBonds.begin(); Runner != Walker->ListOfBonds.end(); (++Runner)) {
1289 OtherWalker = (*Runner)->GetOtherAtom(Walker);
1290 if ((RestrictedKeySet.find(OtherWalker->nr) != RestrictedKeySet.end())
1291 #ifdef ADDHYDROGEN
1292 && (OtherWalker->type->Z != 1)
1293 #endif
1294 ) { // skip hydrogens and restrict to fragment
1295 Log() << Verbose(2) << "Current partner is " << *OtherWalker << " with nr " << OtherWalker->nr << " in bond " << *(*Runner) << "." << endl;
1296 // set the label if not set (and push on root stack as well)
1297 if ((OtherWalker != Predecessor) && (OtherWalker->GetTrueFather()->nr > RootKeyNr)) { // only pass through those with label bigger than Root's
1298 FragmentSearch.ShortestPathList[OtherWalker->nr] = SP+1;
1299 Log() << Verbose(3) << "Set Shortest Path to " << FragmentSearch.ShortestPathList[OtherWalker->nr] << "." << endl;
1300 // add the bond in between to the SP list
1301 Binder = new bond(Walker, OtherWalker); // create a new bond in such a manner, that bond::rightatom is always the one more distant
1302 add(Binder, FragmentSearch.BondsPerSPList[2*(SP+1)+1]);
1303 FragmentSearch.BondsPerSPCount[SP+1]++;
1304 Log() << Verbose(3) << "Added its bond to SP list, having now " << FragmentSearch.BondsPerSPCount[SP+1] << " item(s)." << endl;
1305 } else {
1306 if (OtherWalker != Predecessor)
1307 Log() << Verbose(3) << "Not passing on, as index of " << *OtherWalker << " " << OtherWalker->GetTrueFather()->nr << " is smaller than that of Root " << RootKeyNr << "." << endl;
1308 else
1309 Log() << Verbose(3) << "This is my predecessor " << *Predecessor << "." << endl;
1310 }
1311 } else Log() << Verbose(2) << "Is not in the restricted keyset or skipping hydrogen " << *OtherWalker << "." << endl;
1312 }
1313 }
1314 }
1315};
1316
1317/** prints the Bonds per Shortest Path list in UniqueFragments.
1318 * \param *out output stream
1319 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1320 * \param FragmentSearch UniqueFragments
1321 */
1322void OutputSPList(int Order, struct UniqueFragments &FragmentSearch)
1323{
1324 bond *Binder = NULL;
1325 Log() << Verbose(0) << "Printing all found lists." << endl;
1326 for(int i=1;i<Order;i++) { // skip the root edge in the printing
1327 Binder = FragmentSearch.BondsPerSPList[2*i];
1328 Log() << Verbose(1) << "Current SP level is " << i << "." << endl;
1329 while (Binder->next != FragmentSearch.BondsPerSPList[2*i+1]) {
1330 Binder = Binder->next;
1331 Log() << Verbose(2) << *Binder << endl;
1332 }
1333 }
1334};
1335
1336/** Simply counts all bonds in all UniqueFragments::BondsPerSPList lists.
1337 * \param *out output stream
1338 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1339 * \param FragmentSearch UniqueFragments
1340 */
1341int CountNumbersInBondsList(int Order, struct UniqueFragments &FragmentSearch)
1342{
1343 bond *Binder = NULL;
1344 int SP = -1; // the Root <-> Root edge must be subtracted!
1345 for(int i=Order;i--;) { // sum up all found edges
1346 Binder = FragmentSearch.BondsPerSPList[2*i];
1347 while (Binder->next != FragmentSearch.BondsPerSPList[2*i+1]) {
1348 Binder = Binder->next;
1349 SP++;
1350 }
1351 }
1352 return SP;
1353};
1354
1355/** Creates a list of all unique fragments of certain vertex size from a given graph \a Fragment for a given root vertex in the context of \a this molecule.
1356 * -# initialises UniqueFragments structure
1357 * -# fills edge list via BFS
1358 * -# creates the fragment by calling recursive function SPFragmentGenerator with UniqueFragments structure, 0 as
1359 root distance, the edge set, its dimension and the current suborder
1360 * -# Free'ing structure
1361 * Note that we may use the fact that the atoms are SP-ordered on the atomstack. I.e. when popping always the last, we first get all
1362 * with SP of 2, then those with SP of 3, then those with SP of 4 and so on.
1363 * \param *out output stream for debugging
1364 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
1365 * \param FragmentSearch UniqueFragments structure containing TEFactor, root atom and so on
1366 * \param RestrictedKeySet Restricted vertex set to use in context of molecule
1367 * \return number of inserted fragments
1368 * \note ShortestPathList in FragmentSearch structure is probably due to NumberOfAtomsSPLevel and SP not needed anymore
1369 */
1370int molecule::PowerSetGenerator(int Order, struct UniqueFragments &FragmentSearch, KeySet RestrictedKeySet)
1371{
1372 bond **BondsList = NULL;
1373 int Counter = FragmentSearch.FragmentCounter; // mark current value of counter
1374
1375 Log() << Verbose(0) << endl;
1376 Log() << Verbose(0) << "Begin of PowerSetGenerator with order " << Order << " at Root " << *FragmentSearch.Root << "." << endl;
1377
1378 SetSPList(Order, FragmentSearch);
1379
1380 // do a BFS search to fill the SP lists and label the found vertices
1381 FillSPListandLabelVertices(Order, FragmentSearch, this, RestrictedKeySet);
1382
1383 // outputting all list for debugging
1384 OutputSPList(Order, FragmentSearch);
1385
1386 // creating fragments with the found edge sets (may be done in reverse order, faster)
1387 int SP = CountNumbersInBondsList(Order, FragmentSearch);
1388 Log() << Verbose(0) << "Total number of edges is " << SP << "." << endl;
1389 if (SP >= (Order-1)) {
1390 // start with root (push on fragment stack)
1391 Log() << Verbose(0) << "Starting fragment generation with " << *FragmentSearch.Root << ", local nr is " << FragmentSearch.Root->nr << "." << endl;
1392 FragmentSearch.FragmentSet->clear();
1393 Log() << Verbose(0) << "Preparing subset for this root and calling generator." << endl;
1394
1395 // prepare the subset and call the generator
1396 BondsList = Calloc<bond*>(FragmentSearch.BondsPerSPCount[0], "molecule::PowerSetGenerator: **BondsList");
1397 BondsList[0] = FragmentSearch.BondsPerSPList[0]->next; // on SP level 0 there's only the root bond
1398
1399 SPFragmentGenerator(&FragmentSearch, 0, BondsList, FragmentSearch.BondsPerSPCount[0], Order);
1400
1401 Free(&BondsList);
1402 } else {
1403 Log() << Verbose(0) << "Not enough total number of edges to build " << Order << "-body fragments." << endl;
1404 }
1405
1406 // as FragmentSearch structure is used only once, we don't have to clean it anymore
1407 // remove root from stack
1408 Log() << Verbose(0) << "Removing root again from stack." << endl;
1409 FragmentSearch.FragmentSet->erase(FragmentSearch.Root->nr);
1410
1411 // free'ing the bonds lists
1412 ResetSPList(Order, FragmentSearch);
1413
1414 // return list
1415 Log() << Verbose(0) << "End of PowerSetGenerator." << endl;
1416 return (FragmentSearch.FragmentCounter - Counter);
1417};
1418
1419bool KeyCompare::operator() (const KeySet SubgraphA, const KeySet SubgraphB) const
1420{
1421 //Log() << Verbose(0) << "my check is used." << endl;
1422 if (SubgraphA.size() < SubgraphB.size()) {
1423 return true;
1424 } else {
1425 if (SubgraphA.size() > SubgraphB.size()) {
1426 return false;
1427 } else {
1428 KeySet::iterator IteratorA = SubgraphA.begin();
1429 KeySet::iterator IteratorB = SubgraphB.begin();
1430 while ((IteratorA != SubgraphA.end()) && (IteratorB != SubgraphB.end())) {
1431 if ((*IteratorA) < (*IteratorB))
1432 return true;
1433 else if ((*IteratorA) > (*IteratorB)) {
1434 return false;
1435 } // else, go on to next index
1436 IteratorA++;
1437 IteratorB++;
1438 } // end of while loop
1439 }// end of check in case of equal sizes
1440 }
1441 return false; // if we reach this point, they are equal
1442};
1443
1444
1445/** Combines all KeySets from all orders into single ones (with just unique entries).
1446 * \param *out output stream for debugging
1447 * \param *&FragmentList list to fill
1448 * \param ***FragmentLowerOrdersList
1449 * \param &RootStack stack with all root candidates (unequal to each atom in complete molecule if adaptive scheme is applied)
1450 * \param *mol molecule with atoms and bonds
1451 */
1452int CombineAllOrderListIntoOne(Graph *&FragmentList, Graph ***FragmentLowerOrdersList, KeyStack &RootStack, molecule *mol)
1453{
1454 int RootNr = 0;
1455 int RootKeyNr = 0;
1456 int StartNr = 0;
1457 int counter = 0;
1458 int NumLevels = 0;
1459 atom *Walker = NULL;
1460
1461 Log() << Verbose(0) << "Combining the lists of all orders per order and finally into a single one." << endl;
1462 if (FragmentList == NULL) {
1463 FragmentList = new Graph;
1464 counter = 0;
1465 } else {
1466 counter = FragmentList->size();
1467 }
1468
1469 StartNr = RootStack.back();
1470 do {
1471 RootKeyNr = RootStack.front();
1472 RootStack.pop_front();
1473 Walker = mol->FindAtom(RootKeyNr);
1474 NumLevels = 1 << (Walker->AdaptiveOrder - 1);
1475 for(int i=0;i<NumLevels;i++) {
1476 if (FragmentLowerOrdersList[RootNr][i] != NULL) {
1477 InsertGraphIntoGraph(*FragmentList, (*FragmentLowerOrdersList[RootNr][i]), &counter);
1478 }
1479 }
1480 RootStack.push_back(Walker->nr);
1481 RootNr++;
1482 } while (RootKeyNr != StartNr);
1483 return counter;
1484};
1485
1486/** Free's memory allocated for all KeySets from all orders.
1487 * \param *out output stream for debugging
1488 * \param ***FragmentLowerOrdersList
1489 * \param &RootStack stack with all root candidates (unequal to each atom in complete molecule if adaptive scheme is applied)
1490 * \param *mol molecule with atoms and bonds
1491 */
1492void FreeAllOrdersList(Graph ***FragmentLowerOrdersList, KeyStack &RootStack, molecule *mol)
1493{
1494 Log() << Verbose(1) << "Free'ing the lists of all orders per order." << endl;
1495 int RootNr = 0;
1496 int RootKeyNr = 0;
1497 int NumLevels = 0;
1498 atom *Walker = NULL;
1499 while (!RootStack.empty()) {
1500 RootKeyNr = RootStack.front();
1501 RootStack.pop_front();
1502 Walker = mol->FindAtom(RootKeyNr);
1503 NumLevels = 1 << (Walker->AdaptiveOrder - 1);
1504 for(int i=0;i<NumLevels;i++) {
1505 if (FragmentLowerOrdersList[RootNr][i] != NULL) {
1506 delete(FragmentLowerOrdersList[RootNr][i]);
1507 }
1508 }
1509 Free(&FragmentLowerOrdersList[RootNr]);
1510 RootNr++;
1511 }
1512 Free(&FragmentLowerOrdersList);
1513};
1514
1515
1516/** Performs BOSSANOVA decomposition at selected sites, increasing the cutoff by one at these sites.
1517 * -# constructs a complete keyset of the molecule
1518 * -# In a loop over all possible roots from the given rootstack
1519 * -# increases order of root site
1520 * -# calls PowerSetGenerator with this order, the complete keyset and the rootkeynr
1521 * -# for all consecutive lower levels PowerSetGenerator is called with the suborder, the higher order keyset
1522as the restricted one and each site in the set as the root)
1523 * -# these are merged into a fragment list of keysets
1524 * -# All fragment lists (for all orders, i.e. from all destination fields) are merged into one list for return
1525 * Important only is that we create all fragments, it is not important if we create them more than once
1526 * as these copies are filtered out via use of the hash table (KeySet).
1527 * \param *out output stream for debugging
1528 * \param Fragment&*List list of already present keystacks (adaptive scheme) or empty list
1529 * \param &RootStack stack with all root candidates (unequal to each atom in complete molecule if adaptive scheme is applied)
1530 * \param *MinimumRingSize minimum ring size for each atom (molecule::Atomcount)
1531 * \return pointer to Graph list
1532 */
1533void molecule::FragmentBOSSANOVA(Graph *&FragmentList, KeyStack &RootStack, int *MinimumRingSize)
1534{
1535 Graph ***FragmentLowerOrdersList = NULL;
1536 int NumLevels = 0;
1537 int NumMolecules = 0;
1538 int TotalNumMolecules = 0;
1539 int *NumMoleculesOfOrder = NULL;
1540 int Order = 0;
1541 int UpgradeCount = RootStack.size();
1542 KeyStack FragmentRootStack;
1543 int RootKeyNr = 0;
1544 int RootNr = 0;
1545 struct UniqueFragments FragmentSearch;
1546
1547 Log() << Verbose(0) << "Begin of FragmentBOSSANOVA." << endl;
1548
1549 // FragmentLowerOrdersList is a 2D-array of pointer to MoleculeListClass objects, one dimension represents the ANOVA expansion of a single order (i.e. 5)
1550 // with all needed lower orders that are subtracted, the other dimension is the BondOrder (i.e. from 1 to 5)
1551 NumMoleculesOfOrder = Calloc<int>(UpgradeCount, "molecule::FragmentBOSSANOVA: *NumMoleculesOfOrder");
1552 FragmentLowerOrdersList = Calloc<Graph**>(UpgradeCount, "molecule::FragmentBOSSANOVA: ***FragmentLowerOrdersList");
1553
1554 // initialise the fragments structure
1555 FragmentSearch.FragmentCounter = 0;
1556 FragmentSearch.FragmentSet = new KeySet;
1557 FragmentSearch.Root = FindAtom(RootKeyNr);
1558 FragmentSearch.ShortestPathList = Malloc<int>(AtomCount, "molecule::PowerSetGenerator: *ShortestPathList");
1559 for (int i=AtomCount;i--;) {
1560 FragmentSearch.ShortestPathList[i] = -1;
1561 }
1562
1563 // Construct the complete KeySet which we need for topmost level only (but for all Roots)
1564 atom *Walker = start;
1565 KeySet CompleteMolecule;
1566 while (Walker->next != end) {
1567 Walker = Walker->next;
1568 CompleteMolecule.insert(Walker->GetTrueFather()->nr);
1569 }
1570
1571 // this can easily be seen: if Order is 5, then the number of levels for each lower order is the total sum of the number of levels above, as
1572 // each has to be split up. E.g. for the second level we have one from 5th, one from 4th, two from 3th (which in turn is one from 5th, one from 4th),
1573 // hence we have overall four 2th order levels for splitting. This also allows for putting all into a single array (FragmentLowerOrdersList[])
1574 // with the order along the cells as this: 5433222211111111 for BondOrder 5 needing 16=pow(2,5-1) cells (only we use bit-shifting which is faster)
1575 RootNr = 0; // counts through the roots in RootStack
1576 while ((RootNr < UpgradeCount) && (!RootStack.empty())) {
1577 RootKeyNr = RootStack.front();
1578 RootStack.pop_front();
1579 Walker = FindAtom(RootKeyNr);
1580 // check cyclic lengths
1581 //if ((MinimumRingSize[Walker->GetTrueFather()->nr] != -1) && (Walker->GetTrueFather()->AdaptiveOrder+1 > MinimumRingSize[Walker->GetTrueFather()->nr])) {
1582 // Log() << Verbose(0) << "Bond order " << Walker->GetTrueFather()->AdaptiveOrder << " of Root " << *Walker << " greater than or equal to Minimum Ring size of " << MinimumRingSize << " found is not allowed." << endl;
1583 //} else
1584 {
1585 // increase adaptive order by one
1586 Walker->GetTrueFather()->AdaptiveOrder++;
1587 Order = Walker->AdaptiveOrder = Walker->GetTrueFather()->AdaptiveOrder;
1588
1589 // initialise Order-dependent entries of UniqueFragments structure
1590 InitialiseSPList(Order, FragmentSearch);
1591
1592 // allocate memory for all lower level orders in this 1D-array of ptrs
1593 NumLevels = 1 << (Order-1); // (int)pow(2,Order);
1594 FragmentLowerOrdersList[RootNr] = Calloc<Graph*>(NumLevels, "molecule::FragmentBOSSANOVA: **FragmentLowerOrdersList[]");
1595
1596 // create top order where nothing is reduced
1597 Log() << Verbose(0) << "==============================================================================================================" << endl;
1598 Log() << Verbose(0) << "Creating KeySets of Bond Order " << Order << " for " << *Walker << ", " << (RootStack.size()-RootNr) << " Roots remaining." << endl; // , NumLevels is " << NumLevels << "
1599
1600 // Create list of Graphs of current Bond Order (i.e. F_{ij})
1601 FragmentLowerOrdersList[RootNr][0] = new Graph;
1602 FragmentSearch.TEFactor = 1.;
1603 FragmentSearch.Leaflet = FragmentLowerOrdersList[RootNr][0]; // set to insertion graph
1604 FragmentSearch.Root = Walker;
1605 NumMoleculesOfOrder[RootNr] = PowerSetGenerator(Walker->AdaptiveOrder, FragmentSearch, CompleteMolecule);
1606
1607 // output resulting number
1608 Log() << Verbose(1) << "Number of resulting KeySets is: " << NumMoleculesOfOrder[RootNr] << "." << endl;
1609 if (NumMoleculesOfOrder[RootNr] != 0) {
1610 NumMolecules = 0;
1611 } else {
1612 Walker->GetTrueFather()->MaxOrder = true;
1613 }
1614 // now, we have completely filled each cell of FragmentLowerOrdersList[] for the current Walker->AdaptiveOrder
1615 //NumMoleculesOfOrder[Walker->AdaptiveOrder-1] = NumMolecules;
1616 TotalNumMolecules += NumMoleculesOfOrder[RootNr];
1617// Log() << Verbose(1) << "Number of resulting molecules for Order " << (int)Walker->GetTrueFather()->AdaptiveOrder << " is: " << NumMoleculesOfOrder[RootNr] << "." << endl;
1618 RootStack.push_back(RootKeyNr); // put back on stack
1619 RootNr++;
1620
1621 // free Order-dependent entries of UniqueFragments structure for next loop cycle
1622 FreeSPList(Order, FragmentSearch);
1623 }
1624 }
1625 Log() << Verbose(0) << "==============================================================================================================" << endl;
1626 Log() << Verbose(1) << "Total number of resulting molecules is: " << TotalNumMolecules << "." << endl;
1627 Log() << Verbose(0) << "==============================================================================================================" << endl;
1628
1629 // cleanup FragmentSearch structure
1630 Free(&FragmentSearch.ShortestPathList);
1631 delete(FragmentSearch.FragmentSet);
1632
1633 // now, FragmentLowerOrdersList is complete, it looks - for BondOrder 5 - as this (number is the ANOVA Order of the terms therein)
1634 // 5433222211111111
1635 // 43221111
1636 // 3211
1637 // 21
1638 // 1
1639
1640 // Subsequently, we combine all into a single list (FragmentList)
1641 CombineAllOrderListIntoOne(FragmentList, FragmentLowerOrdersList, RootStack, this);
1642 FreeAllOrdersList(FragmentLowerOrdersList, RootStack, this);
1643 Free(&NumMoleculesOfOrder);
1644
1645 Log() << Verbose(0) << "End of FragmentBOSSANOVA." << endl;
1646};
1647
1648/** Corrects the nuclei position if the fragment was created over the cell borders.
1649 * Scans all bonds, checks the distance, if greater than typical, we have a candidate for the correction.
1650 * We remove the bond whereafter the graph probably separates. Then, we translate the one component periodically
1651 * and re-add the bond. Looping on the distance check.
1652 * \param *out ofstream for debugging messages
1653 */
1654void molecule::ScanForPeriodicCorrection()
1655{
1656 bond *Binder = NULL;
1657 bond *OtherBinder = NULL;
1658 atom *Walker = NULL;
1659 atom *OtherWalker = NULL;
1660 double *matrix = ReturnFullMatrixforSymmetric(cell_size);
1661 enum Shading *ColorList = NULL;
1662 double tmp;
1663 Vector Translationvector;
1664 //class StackClass<atom *> *CompStack = NULL;
1665 class StackClass<atom *> *AtomStack = new StackClass<atom *>(AtomCount);
1666 bool flag = true;
1667
1668 Log() << Verbose(2) << "Begin of ScanForPeriodicCorrection." << endl;
1669
1670 ColorList = Calloc<enum Shading>(AtomCount, "molecule::ScanForPeriodicCorrection: *ColorList");
1671 while (flag) {
1672 // remove bonds that are beyond bonddistance
1673 for(int i=NDIM;i--;)
1674 Translationvector.x[i] = 0.;
1675 // scan all bonds
1676 Binder = first;
1677 flag = false;
1678 while ((!flag) && (Binder->next != last)) {
1679 Binder = Binder->next;
1680 for (int i=NDIM;i--;) {
1681 tmp = fabs(Binder->leftatom->x.x[i] - Binder->rightatom->x.x[i]);
1682 //Log() << Verbose(3) << "Checking " << i << "th distance of " << *Binder->leftatom << " to " << *Binder->rightatom << ": " << tmp << "." << endl;
1683 if (tmp > BondDistance) {
1684 OtherBinder = Binder->next; // note down binding partner for later re-insertion
1685 unlink(Binder); // unlink bond
1686 Log() << Verbose(2) << "Correcting at bond " << *Binder << "." << endl;
1687 flag = true;
1688 break;
1689 }
1690 }
1691 }
1692 if (flag) {
1693 // create translation vector from their periodically modified distance
1694 for (int i=NDIM;i--;) {
1695 tmp = Binder->leftatom->x.x[i] - Binder->rightatom->x.x[i];
1696 if (fabs(tmp) > BondDistance)
1697 Translationvector.x[i] = (tmp < 0) ? +1. : -1.;
1698 }
1699 Translationvector.MatrixMultiplication(matrix);
1700 //Log() << Verbose(3) << "Translation vector is ";
1701 Translationvector.Output();
1702 Log() << Verbose(0) << endl;
1703 // apply to all atoms of first component via BFS
1704 for (int i=AtomCount;i--;)
1705 ColorList[i] = white;
1706 AtomStack->Push(Binder->leftatom);
1707 while (!AtomStack->IsEmpty()) {
1708 Walker = AtomStack->PopFirst();
1709 //Log() << Verbose (3) << "Current Walker is: " << *Walker << "." << endl;
1710 ColorList[Walker->nr] = black; // mark as explored
1711 Walker->x.AddVector(&Translationvector); // translate
1712 for (BondList::const_iterator Runner = Walker->ListOfBonds.begin(); Runner != Walker->ListOfBonds.end(); (++Runner)) {
1713 if ((*Runner) != Binder) {
1714 OtherWalker = (*Runner)->GetOtherAtom(Walker);
1715 if (ColorList[OtherWalker->nr] == white) {
1716 AtomStack->Push(OtherWalker); // push if yet unexplored
1717 }
1718 }
1719 }
1720 }
1721 // re-add bond
1722 link(Binder, OtherBinder);
1723 } else {
1724 Log() << Verbose(3) << "No corrections for this fragment." << endl;
1725 }
1726 //delete(CompStack);
1727 }
1728
1729 // free allocated space from ReturnFullMatrixforSymmetric()
1730 delete(AtomStack);
1731 Free(&ColorList);
1732 Free(&matrix);
1733 Log() << Verbose(2) << "End of ScanForPeriodicCorrection." << endl;
1734};
Note: See TracBrowser for help on using the repository browser.