Ignore:
Timestamp:
Feb 24, 2013, 12:58:52 PM (12 years ago)
Author:
Frederik Heber <heber@…>
Branches:
Action_Thermostats, Add_AtomRandomPerturbation, Add_FitFragmentPartialChargesAction, Add_RotateAroundBondAction, Add_SelectAtomByNameAction, Added_ParseSaveFragmentResults, AddingActions_SaveParseParticleParameters, Adding_Graph_to_ChangeBondActions, Adding_MD_integration_tests, Adding_ParticleName_to_Atom, Adding_StructOpt_integration_tests, AtomFragments, Automaking_mpqc_open, AutomationFragmentation_failures, Candidate_v1.5.4, Candidate_v1.6.0, Candidate_v1.6.1, ChangeBugEmailaddress, ChangingTestPorts, ChemicalSpaceEvaluator, CombiningParticlePotentialParsing, Combining_Subpackages, Debian_Package_split, Debian_package_split_molecuildergui_only, Disabling_MemDebug, Docu_Python_wait, EmpiricalPotential_contain_HomologyGraph, EmpiricalPotential_contain_HomologyGraph_documentation, Enable_parallel_make_install, Enhance_userguide, Enhanced_StructuralOptimization, Enhanced_StructuralOptimization_continued, Example_ManyWaysToTranslateAtom, Exclude_Hydrogens_annealWithBondGraph, FitPartialCharges_GlobalError, Fix_BoundInBox_CenterInBox_MoleculeActions, Fix_ChargeSampling_PBC, Fix_ChronosMutex, Fix_FitPartialCharges, Fix_FitPotential_needs_atomicnumbers, Fix_ForceAnnealing, Fix_IndependentFragmentGrids, Fix_ParseParticles, Fix_ParseParticles_split_forward_backward_Actions, Fix_PopActions, Fix_QtFragmentList_sorted_selection, Fix_Restrictedkeyset_FragmentMolecule, Fix_StatusMsg, Fix_StepWorldTime_single_argument, Fix_Verbose_Codepatterns, Fix_fitting_potentials, Fixes, ForceAnnealing_goodresults, ForceAnnealing_oldresults, ForceAnnealing_tocheck, ForceAnnealing_with_BondGraph, ForceAnnealing_with_BondGraph_continued, ForceAnnealing_with_BondGraph_continued_betteresults, ForceAnnealing_with_BondGraph_contraction-expansion, FragmentAction_writes_AtomFragments, FragmentMolecule_checks_bonddegrees, GeometryObjects, Gui_Fixes, Gui_displays_atomic_force_velocity, ImplicitCharges, IndependentFragmentGrids, IndependentFragmentGrids_IndividualZeroInstances, IndependentFragmentGrids_IntegrationTest, IndependentFragmentGrids_Sole_NN_Calculation, JobMarket_RobustOnKillsSegFaults, JobMarket_StableWorkerPool, JobMarket_unresolvable_hostname_fix, MoreRobust_FragmentAutomation, ODR_violation_mpqc_open, PartialCharges_OrthogonalSummation, PdbParser_setsAtomName, PythonUI_with_named_parameters, QtGui_reactivate_TimeChanged_changes, Recreated_GuiChecks, Rewrite_FitPartialCharges, RotateToPrincipalAxisSystem_UndoRedo, SaturateAtoms_findBestMatching, SaturateAtoms_singleDegree, StoppableMakroAction, Subpackage_CodePatterns, Subpackage_JobMarket, Subpackage_LinearAlgebra, Subpackage_levmar, Subpackage_mpqc_open, Subpackage_vmg, Switchable_LogView, ThirdParty_MPQC_rebuilt_buildsystem, TrajectoryDependenant_MaxOrder, TremoloParser_IncreasedPrecision, TremoloParser_MultipleTimesteps, TremoloParser_setsAtomName, Ubuntu_1604_changes, stable
Children:
355af8
Parents:
8ea8c8
git-author:
Frederik Heber <heber@…> (10/15/12 05:06:16)
git-committer:
Frederik Heber <heber@…> (02/24/13 12:58:52)
Message:

Added class TrainingData encapsulating the training input and output vectors.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • src/LevMartester.cpp

    r8ea8c8 r4ec18b  
    4040
    4141#include <boost/assign.hpp>
     42#include <boost/bind.hpp>
    4243#include <boost/filesystem.hpp>
     44#include <boost/function.hpp>
    4345#include <boost/program_options.hpp>
    4446
     
    235237  else
    236238    return angle/divisor;
     239}
     240
     241/** Namespace containing all simple extractor functions.
     242 *
     243 */
     244namespace Extractors {
     245  /** Simple extractor of all unique pair distances of a given \a fragment.
     246   *
     247   * \param fragment fragment with all nuclei positions
     248   * \param index index refers to the index within the global set of configurations
     249   * \return vector of of argument_t containing all found distances
     250   */
     251  FunctionModel::arguments_t gatherAllDistances(
     252      const Fragment& fragment,
     253      const size_t index
     254      ) {
     255    // get distance out of Fragment
     256    const Fragment::charges_t charges = fragment.getCharges();
     257    const Fragment::positions_t positions = fragment.getPositions();
     258    return gatherAllDistanceArguments(charges, positions, index);
     259  }
     260
     261  /** Gather first distance for the two matching charges.
     262   *
     263   * \param fragment fragment with all nuclei positions
     264   * \param index index refers to the index within the global set of configurations
     265   * \param firstelement first element of pair
     266   * \param secondelement second element of pair, order is reflected in indices of return argument_t
     267   * \return vector of of argument_t containing all found distances
     268   */
     269  FunctionModel::arguments_t gatherFirstDistance(
     270      const Fragment& fragment,
     271      const size_t index,
     272      const size_t firstelement,
     273      const size_t secondelement
     274      ) {
     275    const Fragment::charges_t charges = fragment.getCharges();
     276    const Fragment::positions_t positions = fragment.getPositions();
     277    typedef Fragment::charges_t::const_iterator chargeiter_t;
     278    std::vector< chargeiter_t > firstpair;
     279    firstpair.reserve(2);
     280    firstpair +=
     281        std::find(charges.begin(), charges.end(), firstelement),
     282        std::find(charges.begin(), charges.end(), secondelement);
     283    if ((firstpair[0] == charges.end()) || (firstpair[1] == charges.end())) {
     284      // complain if tuple not found
     285      ELOG(1, "Could not find pair " << firstelement << "," << secondelement
     286          << " in fragment " << fragment);
     287      return FunctionModel::arguments_t();
     288    }
     289    // convert position_t to Vector
     290    std::vector< std::pair<Vector, size_t> > DistancePair;
     291    for (std::vector<chargeiter_t>::const_iterator firstpairiter = firstpair.begin();
     292        firstpairiter != firstpair.end(); ++firstpairiter) {
     293      Fragment::positions_t::const_iterator positer = positions.begin();
     294      const size_t steps = std::distance(charges.begin(), *firstpairiter);
     295      std::advance(positer, steps);
     296      DistancePair.push_back(
     297          std::make_pair(Vector((*positer)[0], (*positer)[1], (*positer)[2]),
     298              steps));
     299    }
     300    // finally convert Vector pair to distance-like argument
     301    argument_t arg;
     302    arg.indices.first = DistancePair[0].second;
     303    arg.indices.second = DistancePair[1].second;
     304    arg.distance = DistancePair[0].first.distance(DistancePair[1].first);
     305    arg.globalid = index;
     306
     307    return FunctionModel::arguments_t(1, arg);
     308  }
     309
     310}; /* namespace Extractors */
     311
     312/** This class encapsulates the training data for a given potential function
     313 * to learn.
     314 *
     315 * The data is added piece-wise by calling the operator() with a specific
     316 * Fragment.
     317 */
     318class TrainingData
     319{
     320public:
     321  //!> typedef for a range within the HomologyContainer at which fragments to look at
     322  typedef std::pair<
     323      HomologyContainer::const_iterator,
     324      HomologyContainer::const_iterator> range_t;
     325  //!> Training tuple input vector pair
     326  typedef FunctionApproximation::inputs_t InputVector_t;
     327  //!> Training tuple output vector pair
     328  typedef FunctionApproximation::outputs_t OutputVector_t;
     329  //!> Typedef for a function containing how to extract required information from a Fragment.
     330  typedef boost::function< FunctionModel::arguments_t (const Fragment &, const size_t)> extractor_t;
     331
     332public:
     333  /** Constructor for class TrainingData.
     334   *
     335   */
     336  explicit TrainingData(const extractor_t &_extractor) :
     337      extractor(extractor)
     338  {}
     339  /** Destructor for class TrainingData.
     340   *
     341   */
     342  ~TrainingData()
     343  {}
     344
     345  /** We go through the given \a range of homologous fragments and call
     346   * TrainingData::extractor on them in order to gather the distance and
     347   * the energy value, stored internally.
     348   *
     349   * \param range given range within a HomologyContainer of homologous fragments
     350   */
     351  void operator()(const range_t &range) {
     352    for (HomologyContainer::const_iterator iter = range.first; iter != range.second; ++iter) {
     353      // get distance out of Fragment
     354      const Fragment &fragment = iter->second.first;
     355      FunctionModel::arguments_t args = extractor(
     356            fragment,
     357            DistanceVector.size()
     358          );
     359      DistanceVector.push_back( args );
     360      const double &energy = iter->second.second;
     361      EnergyVector.push_back( FunctionModel::results_t(1, energy) );
     362    }
     363  }
     364
     365  /** Getter for const access to internal training data inputs.
     366   *
     367   * \return const ref to training tuple of input vector
     368   */
     369  const InputVector_t& getTrainingInputs() const {
     370    return DistanceVector;
     371  }
     372
     373  /** Getter for const access to internal training data outputs.
     374   *
     375   * \return const ref to training tuple of output vector
     376   */
     377  const OutputVector_t& getTrainingOutputs() const {
     378    return EnergyVector;
     379  }
     380
     381private:
     382  // prohibit use of default constructor, as we always require extraction functor.
     383  TrainingData();
     384
     385private:
     386  //!> private training data vector
     387  InputVector_t DistanceVector;
     388  OutputVector_t EnergyVector;
     389  //!> function to be used for training input data extraction from a fragment
     390  const extractor_t extractor;
     391};
     392
     393// print training data for debugging
     394std::ostream &operator<<(std::ostream &out, const TrainingData &data)
     395{
     396  const TrainingData::InputVector_t &DistanceVector = data.getTrainingInputs();
     397  const TrainingData::OutputVector_t &EnergyVector = data.getTrainingOutputs();
     398  out << "(" << DistanceVector.size()
     399      << "," << EnergyVector.size() << ") data pairs: ";
     400  FunctionApproximation::inputs_t::const_iterator initer = DistanceVector.begin();
     401  FunctionApproximation::outputs_t::const_iterator outiter = EnergyVector.begin();
     402  for (; initer != DistanceVector.end(); ++initer, ++outiter) {
     403    for (size_t index = 0; index < (*initer).size(); ++index)
     404       out << "(" << (*initer)[index].indices.first << "," << (*initer)[index].indices.second
     405          << ") " << (*initer)[index].distance;
     406    out << " with energy " << *outiter;
     407  }
     408  return out;
    237409}
    238410
     
    400572
    401573    // Afterwards we go through all of this type and gather the distance and the energy value
    402     typedef std::pair<
    403         FunctionApproximation::inputs_t,
    404         FunctionApproximation::outputs_t> InputOutputVector_t;
    405     InputOutputVector_t DistanceEnergyVector;
    406     std::pair<HomologyContainer::const_iterator, HomologyContainer::const_iterator> range =
    407         homologies.getHomologousGraphs(graph);
    408     for (HomologyContainer::const_iterator iter = range.first; iter != range.second; ++iter) {
    409       // get distance out of Fragment
    410       const double &energy = iter->second.second;
    411       const Fragment &fragment = iter->second.first;
    412       const Fragment::charges_t charges = fragment.getCharges();
    413       const Fragment::positions_t positions = fragment.getPositions();
    414       std::vector< std::pair<Vector, size_t> > DistanceVectors;
    415       for (Fragment::charges_t::const_iterator chargeiter = charges.begin();
    416           chargeiter != charges.end(); ++chargeiter) {
    417         if (*chargeiter == 6) {
    418           Fragment::positions_t::const_iterator positer = positions.begin();
    419           const size_t steps = std::distance(charges.begin(), chargeiter);
    420           std::advance(positer, steps);
    421           DistanceVectors.push_back(
    422               std::make_pair(Vector((*positer)[0], (*positer)[1], (*positer)[2]),
    423                   steps));
    424         }
    425       }
    426       if (DistanceVectors.size() == (size_t)2) {
    427         argument_t arg;
    428         arg.indices.first = DistanceVectors[0].second;
    429         arg.indices.second = DistanceVectors[1].second;
    430         arg.distance = DistanceVectors[0].first.distance(DistanceVectors[1].first);
    431         arg.globalid = DistanceEnergyVector.first.size();
    432         DistanceEnergyVector.first.push_back( FunctionModel::arguments_t(1,arg) );
    433         DistanceEnergyVector.second.push_back( FunctionModel::results_t(1,energy) );
    434       } else {
    435         ELOG(2, "main() - found not exactly two carbon atoms in fragment "
    436             << fragment << ".");
    437       }
    438     }
    439     // print training data for debugging
    440     {
    441       LOG(1, "INFO: I gathered the following (" << DistanceEnergyVector.first.size()
    442           << "," << DistanceEnergyVector.second.size() << ") data pairs: ");
    443       FunctionApproximation::inputs_t::const_iterator initer = DistanceEnergyVector.first.begin();
    444       FunctionApproximation::outputs_t::const_iterator outiter = DistanceEnergyVector.second.begin();
    445       for (; initer != DistanceEnergyVector.first.end(); ++initer, ++outiter) {
    446         LOG(1, "INFO: (" << (*initer)[0].indices.first << "," << (*initer)[0].indices.second
    447             << ") " << (*initer)[0].distance << " with energy " << *outiter);
    448       }
    449     }
     574    TrainingData MorseData(
     575        boost::bind(&Extractors::gatherFirstDistance, _1, _2, 6, 6) // gather first carbon pair
     576        );
     577    MorseData(homologies.getHomologousGraphs(graph));
     578    LOG(1, "INFO: I gathered the following training data: " << MorseData);
    450579    // NOTICE that distance are in bohrradi as they come from MPQC!
    451580
     
    459588    morse.setParameters(params);
    460589    FunctionModel &model = morse;
    461     FunctionApproximation approximator(1, 1, model);
    462     approximator.setTrainingData(DistanceEnergyVector.first,DistanceEnergyVector.second);
     590    FunctionApproximation approximator(
     591        MorseData.getTrainingInputs().begin()->size(),
     592        MorseData.getTrainingOutputs().begin()->size(),
     593        model);
     594    approximator.setTrainingData(MorseData.getTrainingInputs(),MorseData.getTrainingOutputs());
    463595    if (model.isBoxConstraint() && approximator.checkParameterDerivatives())
    464596      approximator(FunctionApproximation::ParameterDerivative);
     
    534666        DistanceEnergyVector.first.begin()->size(),
    535667        DistanceEnergyVector.second.begin()->size(),
    536         model); // CH4 has 5 atoms, hence 5*4/2 distances
     668        model);
    537669    approximator.setTrainingData(DistanceEnergyVector.first,DistanceEnergyVector.second);
    538670    if (model.isBoxConstraint() && approximator.checkParameterDerivatives())
Note: See TracChangeset for help on using the changeset viewer.