/*
 * Project: MoleCuilder
 * Description: creates and alters molecular systems
 * Copyright (C)  2010 University of Bonn. All rights reserved.
 * Please see the LICENSE file or "Copyright notice" in builder.cpp for details.
 */

/*
 * fragmentation_helpers.cpp
 *
 *  Created on: Oct 18, 2011
 *      Author: heber
 */

// include config.h
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "CodePatterns/MemDebug.hpp"

#include "fragmentation_helpers.hpp"

#include <sstream>

#include "CodePatterns/Log.hpp"

#include "atom.hpp"
#include "Bond/bond.hpp"
#include "Element/element.hpp"
#include "Fragmentation/KeySet.hpp"
#include "Helpers/defs.hpp"
#include "Helpers/helpers.hpp"
#include "molecule.hpp"

using namespace std;

/** Scans a single line for number and puts them into \a KeySet.
 * \param *out output stream for debugging
 * \param *buffer buffer to scan
 * \param &CurrentSet filled KeySet on return
 * \return true - at least one valid atom id parsed, false - CurrentSet is empty
 */
bool ScanBufferIntoKeySet(char *buffer, KeySet &CurrentSet)
{
  stringstream line;
  int AtomNr;
  int status = 0;

  line.str(buffer);
  while (!line.eof()) {
    line >> AtomNr;
    if (AtomNr >= 0) {
      CurrentSet.insert(AtomNr);  // insert at end, hence in same order as in file!
      status++;
    } // else it's "-1" or else and thus must not be added
  }
  DoLog(1) && (Log() << Verbose(1) << "The scanned KeySet is ");
  for(KeySet::iterator runner = CurrentSet.begin(); runner != CurrentSet.end(); runner++) {
    DoLog(0) && (Log() << Verbose(0) << (*runner) << "\t");
  }
  DoLog(0) && (Log() << Verbose(0) << endl);
  return (status != 0);
};

/** Parses the KeySet file and fills \a *FragmentList from the known molecule structure.
 * Does two-pass scanning:
 * -# Scans the keyset file and initialises a temporary graph
 * -# Scans TEFactors file and sets the TEFactor of each key set in the temporary graph accordingly
 * Finally, the temporary graph is inserted into the given \a FragmentList for return.
 * \param &path path to file
 * \param *FragmentList empty, filled on return
 * \return true - parsing successfully, false - failure on parsing (FragmentList will be NULL)
 */
bool ParseKeySetFile(std::string &path, Graph *&FragmentList)
{
  bool status = true;
  ifstream InputFile;
  stringstream line;
  GraphTestPair testGraphInsert;
  int NumberOfFragments = 0;
  string filename;

  if (FragmentList == NULL) { // check list pointer
    FragmentList = new Graph;
  }

  // 1st pass: open file and read
  DoLog(1) && (Log() << Verbose(1) << "Parsing the KeySet file ... " << endl);
  filename = path + KEYSETFILE;
  InputFile.open(filename.c_str());
  if (InputFile.good()) {
    // each line represents a new fragment
    char buffer[MAXSTRINGSIZE];
    // 1. parse keysets and insert into temp. graph
    while (!InputFile.eof()) {
      InputFile.getline(buffer, MAXSTRINGSIZE);
      KeySet CurrentSet;
      if ((strlen(buffer) > 0) && (ScanBufferIntoKeySet(buffer, CurrentSet))) {  // if at least one valid atom was added, write config
        testGraphInsert = FragmentList->insert(GraphPair (CurrentSet,pair<int,double>(NumberOfFragments++,1)));  // store fragment number and current factor
        if (!testGraphInsert.second) {
          DoeLog(0) && (eLog()<< Verbose(0) << "KeySet file must be corrupt as there are two equal key sets therein!" << endl);
          performCriticalExit();
        }
      }
    }
    // 2. Free and done
    InputFile.close();
    InputFile.clear();
    DoLog(1) && (Log() << Verbose(1) << "\t ... done." << endl);
  } else {
    DoLog(1) && (Log() << Verbose(1) << "\t ... File " << filename << " not found." << endl);
    status = false;
  }

  return status;
};

/** Parses the TE factors file and fills \a *FragmentList from the known molecule structure.
 * -# Scans TEFactors file and sets the TEFactor of each key set in the temporary graph accordingly
 * \param *out output stream for debugging
 * \param *path path to file
 * \param *FragmentList graph whose nodes's TE factors are set on return
 * \return true - parsing successfully, false - failure on parsing
 */
bool ParseTEFactorsFile(char *path, Graph *FragmentList)
{
  bool status = true;
  ifstream InputFile;
  stringstream line;
  GraphTestPair testGraphInsert;
  int NumberOfFragments = 0;
  double TEFactor;
  char filename[MAXSTRINGSIZE];

  if (FragmentList == NULL) { // check list pointer
    FragmentList = new Graph;
  }

  // 2nd pass: open TEFactors file and read
  DoLog(1) && (Log() << Verbose(1) << "Parsing the TEFactors file ... " << endl);
  sprintf(filename, "%s/%s%s", path, FRAGMENTPREFIX, TEFACTORSFILE);
  InputFile.open(filename);
  if (InputFile != NULL) {
    // 3. add found TEFactors to each keyset
    NumberOfFragments = 0;
    for(Graph::iterator runner = FragmentList->begin();runner != FragmentList->end(); runner++) {
      if (!InputFile.eof()) {
        InputFile >> TEFactor;
        (*runner).second.second = TEFactor;
        DoLog(2) && (Log() << Verbose(2) << "Setting " << ++NumberOfFragments << " fragment's TEFactor to " << (*runner).second.second << "." << endl);
      } else {
        status = false;
        break;
      }
    }
    // 4. Free and done
    InputFile.close();
    DoLog(1) && (Log() << Verbose(1) << "done." << endl);
  } else {
    DoLog(1) && (Log() << Verbose(1) << "File " << filename << " not found." << endl);
    status = false;
  }

  return status;
};

/** Stores key sets to file.
 * \param KeySetList Graph with Keysets
 * \param &path path to file
 * \return true - file written successfully, false - writing failed
 */
bool StoreKeySetFile(Graph &KeySetList, std::string &path)
{
  bool status =  true;
  string line = path + KEYSETFILE;
  ofstream output(line.c_str());

  // open KeySet file
  DoLog(1) && (Log() << Verbose(1) << "Saving key sets of the total graph ... ");
  if(output.good()) {
    for(Graph::iterator runner = KeySetList.begin(); runner != KeySetList.end(); runner++) {
      for (KeySet::iterator sprinter = (*runner).first.begin();sprinter != (*runner).first.end(); sprinter++) {
        if (sprinter != (*runner).first.begin())
          output << "\t";
        output << *sprinter;
      }
      output << endl;
    }
    DoLog(0) && (Log() << Verbose(0) << "done." << endl);
  } else {
    DoeLog(0) && (eLog()<< Verbose(0) << "Unable to open " << line << " for writing keysets!" << endl);
    performCriticalExit();
    status = false;
  }
  output.close();
  output.clear();

  return status;
};


/** Stores TEFactors to file.
 * \param *out output stream for debugging
 * \param KeySetList Graph with factors
 * \param *path path to file
 * \return true - file written successfully, false - writing failed
 */
bool StoreTEFactorsFile(Graph &KeySetList, char *path)
{
  ofstream output;
  bool status =  true;
  string line;

  // open TEFactors file
  line = path;
  line.append("/");
  line += FRAGMENTPREFIX;
  line += TEFACTORSFILE;
  output.open(line.c_str(), ios::out);
  DoLog(1) && (Log() << Verbose(1) << "Saving TEFactors of the total graph ... ");
  if(output != NULL) {
    for(Graph::iterator runner = KeySetList.begin(); runner != KeySetList.end(); runner++)
      output << (*runner).second.second << endl;
    DoLog(1) && (Log() << Verbose(1) << "done." << endl);
  } else {
    DoLog(1) && (Log() << Verbose(1) << "failed to open " << line << "." << endl);
    status = false;
  }
  output.close();

  return status;
};

/** For a given graph, sorts KeySets into a (index, keyset) map.
 * \param *GlobalKeySetList list of keysets with global ids (valid in "this" molecule) needed for adaptive increase
 * \return map from index to keyset
 */
std::map<int,KeySet> * GraphToIndexedKeySet(Graph *GlobalKeySetList)
{
  map<int,KeySet> *IndexKeySetList = new map<int,KeySet>;
  for(Graph::iterator runner = GlobalKeySetList->begin(); runner != GlobalKeySetList->end(); runner++) {
    IndexKeySetList->insert( pair<int,KeySet>(runner->second.first,runner->first) );
  }
  return IndexKeySetList;
};

/** Inserts a (\a No, \a value) pair into the list, overwriting present one.
 * Note if values are equal, No will decided on which is first
 * \param *out output stream for debugging
 * \param &AdaptiveCriteriaList list to insert into
 * \param &IndexedKeySetList list to find key set for a given index \a No
 * \param FragOrder current bond order of fragment
 * \param No index of keyset
 * \param value energy value
 */
void InsertIntoAdaptiveCriteriaList(std::map<int, pair<double,int> > *AdaptiveCriteriaList, std::map<int,KeySet> &IndexKeySetList, int FragOrder, int No, double Value)
{
  map<int,KeySet>::iterator marker = IndexKeySetList.find(No);    // find keyset to Frag No.
  if (marker != IndexKeySetList.end()) {  // if found
    Value *= 1 + MYEPSILON*(*((*marker).second.begin()));     // in case of equal energies this makes them not equal without changing anything actually
    // as the smallest number in each set has always been the root (we use global id to keep the doubles away), seek smallest and insert into AtomMask
    pair <map<int, pair<double,int> >::iterator, bool> InsertedElement = AdaptiveCriteriaList->insert( make_pair(*((*marker).second.begin()), pair<double,int>( fabs(Value), FragOrder) ));
    map<int, pair<double,int> >::iterator PresentItem = InsertedElement.first;
    if (!InsertedElement.second) { // this root is already present
      if ((*PresentItem).second.second < FragOrder)  // if order there is lower, update entry with higher-order term
        //if ((*PresentItem).second.first < (*runner).first)    // as higher-order terms are not always better, we skip this part (which would always include this site into adaptive increase)
        {  // if value is smaller, update value and order
        (*PresentItem).second.first = fabs(Value);
        (*PresentItem).second.second = FragOrder;
        DoLog(2) && (Log() << Verbose(2) << "Updated element (" <<  (*PresentItem).first << ",[" << (*PresentItem).second.first << "," << (*PresentItem).second.second << "])." << endl);
      } else {
        DoLog(2) && (Log() << Verbose(2) << "Did not update element " <<  (*PresentItem).first << " as " << FragOrder << " is less than or equal to " << (*PresentItem).second.second << "." << endl);
      }
    } else {
      DoLog(2) && (Log() << Verbose(2) << "Inserted element (" <<  (*PresentItem).first << ",[" << (*PresentItem).second.first << "," << (*PresentItem).second.second << "])." << endl);
    }
  } else {
    DoLog(1) && (Log() << Verbose(1) << "No Fragment under No. " << No << "found." << endl);
  }
};

/** Counts lines in file.
 * Note we are scanning lines from current position, not from beginning.
 * \param InputFile file to be scanned.
 */
int CountLinesinFile(std::ifstream &InputFile)
{
  char *buffer = new char[MAXSTRINGSIZE];
  int lines=0;

  int PositionMarker = InputFile.tellg();  // not needed as Inputfile is copied, given by value, not by ref
  // count the number of lines, i.e. the number of fragments
  InputFile.getline(buffer, MAXSTRINGSIZE); // skip comment lines
  InputFile.getline(buffer, MAXSTRINGSIZE);
  while(!InputFile.eof()) {
    InputFile.getline(buffer, MAXSTRINGSIZE);
    lines++;
  }
  InputFile.seekg(PositionMarker, ios::beg);
  delete[](buffer);
  return lines;
};


/** Scans the adaptive order file and insert (index, value) into map.
 * \param &path path to ENERGYPERFRAGMENT file (may be NULL if Order is non-negative)
 * \param &IndexedKeySetList list to find key set for a given index \a No
 * \return adaptive criteria list from file
 */
std::map<int, std::pair<double,int> > * ScanAdaptiveFileIntoMap(std::string &path, std::map<int,KeySet> &IndexKeySetList)
{
  map<int, pair<double,int> > *AdaptiveCriteriaList = new map<int, pair<double,int> >;
  int No = 0, FragOrder = 0;
  double Value = 0.;
  char buffer[MAXSTRINGSIZE];
  string filename = path + ENERGYPERFRAGMENT;
  ifstream InputFile(filename.c_str());

  if (InputFile.fail()) {
    DoeLog(1) && (eLog() << Verbose(1) << "Cannot find file " << filename << "." << endl);
    return AdaptiveCriteriaList;
  }

  if (CountLinesinFile(InputFile) > 0) {
    // each line represents a fragment root (Atom::Nr) id and its energy contribution
    InputFile.getline(buffer, MAXSTRINGSIZE); // skip comment lines
    InputFile.getline(buffer, MAXSTRINGSIZE);
    while(!InputFile.eof()) {
      InputFile.getline(buffer, MAXSTRINGSIZE);
      if (strlen(buffer) > 2) {
        //Log() << Verbose(2) << "Scanning: " << buffer << endl;
        stringstream line(buffer);
        line >> FragOrder;
        line >> ws >> No;
        line >> ws >> Value; // skip time entry
        line >> ws >> Value;
        No -= 1;  // indices start at 1 in file, not 0
        //Log() << Verbose(2) << " - yields (" << No << "," << Value << ", " << FragOrder << ")" << endl;

        // clean the list of those entries that have been superceded by higher order terms already
        InsertIntoAdaptiveCriteriaList(AdaptiveCriteriaList, IndexKeySetList, FragOrder, No, Value);
      }
    }
    // close and done
    InputFile.close();
    InputFile.clear();
  }

  return AdaptiveCriteriaList;
};

/** Maps adaptive criteria list back onto (Value, (Root Nr., Order))
 * (i.e. sorted by value to pick the highest ones)
 * \param *out output stream for debugging
 * \param &AdaptiveCriteriaList list to insert into
 * \param *mol molecule with atoms
 * \return remapped list
 */
std::map<double, std::pair<int,int> >  * ReMapAdaptiveCriteriaListToValue(std::map<int, std::pair<double,int> > *AdaptiveCriteriaList, molecule *mol)
{
  atom *Walker = NULL;
  map<double, pair<int,int> > *FinalRootCandidates = new map<double, pair<int,int> > ;
  DoLog(1) && (Log() << Verbose(1) << "Root candidate list is: " << endl);
  for(map<int, pair<double,int> >::iterator runner = AdaptiveCriteriaList->begin(); runner != AdaptiveCriteriaList->end(); runner++) {
    Walker = mol->FindAtom((*runner).first);
    if (Walker != NULL) {
      //if ((*runner).second.second >= Walker->AdaptiveOrder) { // only insert if this is an "active" root site for the current order
      if (!Walker->MaxOrder) {
        DoLog(2) && (Log() << Verbose(2) << "(" << (*runner).first << ",[" << (*runner).second.first << "," << (*runner).second.second << "])" << endl);
        FinalRootCandidates->insert( make_pair( (*runner).second.first, pair<int,int>((*runner).first, (*runner).second.second) ) );
      } else {
        DoLog(2) && (Log() << Verbose(2) << "Excluding (" << *Walker << ", " << (*runner).first << ",[" << (*runner).second.first << "," << (*runner).second.second << "]), as it has reached its maximum order." << endl);
      }
    } else {
      DoeLog(0) && (eLog()<< Verbose(0) << "Atom No. " << (*runner).second.first << " was not found in this molecule." << endl);
      performCriticalExit();
    }
  }
  return FinalRootCandidates;
};

/** Marks all candidate sites for update if below adaptive threshold.
 * Picks a given number of highest values and set *AtomMask to true.
 * \param *out output stream for debugging
 * \param *AtomMask defines true/false per global Atom::Nr to mask in/out each nuclear site, used to activate given number of site to increment order adaptively
 * \param FinalRootCandidates list candidates to check
 * \param Order desired order
 * \param *mol molecule with atoms
 * \return true - if update is necessary, false - not
 */
bool MarkUpdateCandidates(bool *AtomMask, std::map<double, std::pair<int,int> > &FinalRootCandidates, int Order, molecule *mol)
{
  atom *Walker = NULL;
  int No = -1;
  bool status = false;
  for(map<double, pair<int,int> >::iterator runner = FinalRootCandidates.upper_bound(pow(10.,Order)); runner != FinalRootCandidates.end(); runner++) {
    No = (*runner).second.first;
    Walker = mol->FindAtom(No);
    //if (Walker->AdaptiveOrder < MinimumRingSize[Walker->getNr()]) {
      DoLog(2) && (Log() << Verbose(2) << "Root " << No << " is still above threshold (10^{" << Order <<"}: " << runner->first << ", setting entry " << No << " of Atom mask to true." << endl);
      AtomMask[No] = true;
      status = true;
    //} else
      //Log() << Verbose(2) << "Root " << No << " is still above threshold (10^{" << Order <<"}: " << runner->first << ", however MinimumRingSize of " << MinimumRingSize[Walker->getNr()] << " does not allow further adaptive increase." << endl;
  }
  return status;
};

/** print atom mask for debugging.
 * \param *out output stream for debugging
 * \param *AtomMask defines true/false per global Atom::Nr to mask in/out each nuclear site, used to activate given number of site to increment order adaptively
 * \param AtomCount number of entries in \a *AtomMask
 */
void PrintAtomMask(bool *AtomMask, int AtomCount)
{
  DoLog(2) && (Log() << Verbose(2) << "              ");
  for(int i=0;i<AtomCount;i++)
    DoLog(0) && (Log() << Verbose(0) << (i % 10));
  DoLog(0) && (Log() << Verbose(0) << endl);
  DoLog(2) && (Log() << Verbose(2) << "Atom mask is: ");
  for(int i=0;i<AtomCount;i++)
    DoLog(0) && (Log() << Verbose(0) << (AtomMask[i] ? "t" : "f"));
  DoLog(0) && (Log() << Verbose(0) << endl);
};

/** Combines all KeySets from all orders into single ones (with just unique entries).
 * \param *out output stream for debugging
 * \param *&FragmentList list to fill
 * \param ***FragmentLowerOrdersList
 * \param &RootStack stack with all root candidates (unequal to each atom in complete molecule if adaptive scheme is applied)
 * \param *mol molecule with atoms and bonds
 */
int CombineAllOrderListIntoOne(Graph *&FragmentList, Graph ***FragmentLowerOrdersList, KeyStack &RootStack, molecule *mol)
{
  int RootNr = 0;
  int RootKeyNr = 0;
  int StartNr = 0;
  int counter = 0;
  int NumLevels = 0;
  atom *Walker = NULL;

  DoLog(0) && (Log() << Verbose(0) << "Combining the lists of all orders per order and finally into a single one." << endl);
  if (FragmentList == NULL) {
    FragmentList = new Graph;
    counter = 0;
  } else {
    counter = FragmentList->size();
  }

  StartNr = RootStack.back();
  do {
    RootKeyNr = RootStack.front();
    RootStack.pop_front();
    Walker = mol->FindAtom(RootKeyNr);
    NumLevels = 1 << (Walker->AdaptiveOrder - 1);
    for(int i=0;i<NumLevels;i++) {
      if (FragmentLowerOrdersList[RootNr][i] != NULL) {
        InsertGraphIntoGraph(*FragmentList, (*FragmentLowerOrdersList[RootNr][i]), &counter);
      }
    }
    RootStack.push_back(Walker->getNr());
    RootNr++;
  } while (RootKeyNr != StartNr);
  return counter;
};

/** Free's memory allocated for all KeySets from all orders.
 * \param *out output stream for debugging
 * \param ***FragmentLowerOrdersList
 * \param &RootStack stack with all root candidates (unequal to each atom in complete molecule if adaptive scheme is applied)
 * \param *mol molecule with atoms and bonds
 */
void FreeAllOrdersList(Graph ***FragmentLowerOrdersList, KeyStack &RootStack, molecule *mol)
{
  DoLog(1) && (Log() << Verbose(1) << "Free'ing the lists of all orders per order." << endl);
  int RootNr = 0;
  int RootKeyNr = 0;
  int NumLevels = 0;
  atom *Walker = NULL;
  while (!RootStack.empty()) {
    RootKeyNr = RootStack.front();
    RootStack.pop_front();
    Walker = mol->FindAtom(RootKeyNr);
    NumLevels = 1 << (Walker->AdaptiveOrder - 1);
    for(int i=0;i<NumLevels;i++) {
      if (FragmentLowerOrdersList[RootNr][i] != NULL) {
        delete(FragmentLowerOrdersList[RootNr][i]);
      }
    }
    delete[](FragmentLowerOrdersList[RootNr]);
    RootNr++;
  }
  delete[](FragmentLowerOrdersList);
};

