source: src/Potentials/PotentialTrainer.cpp@ 945797

Action_Thermostats Add_AtomRandomPerturbation Add_FitFragmentPartialChargesAction Add_RotateAroundBondAction Add_SelectAtomByNameAction Added_ParseSaveFragmentResults Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests Automaking_mpqc_open AutomationFragmentation_failures Candidate_v1.5.4 Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChargeSampling_PBC Fix_ChronosMutex Fix_FitPartialCharges Fix_FitPotential_needs_atomicnumbers Fix_ForceAnnealing Fix_IndependentFragmentGrids Fix_ParseParticles Fix_ParseParticles_split_forward_backward_Actions Fix_QtFragmentList_sorted_selection Fix_Restrictedkeyset_FragmentMolecule Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns Fix_fitting_potentials Fixes ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids IndependentFragmentGrids_IndividualZeroInstances IndependentFragmentGrids_IntegrationTest IndependentFragmentGrids_Sole_NN_Calculation JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo SaturateAtoms_findBestMatching SaturateAtoms_singleDegree StoppableMakroAction Subpackage_CodePatterns Subpackage_JobMarket Subpackage_LinearAlgebra Subpackage_levmar Subpackage_mpqc_open Subpackage_vmg ThirdParty_MPQC_rebuilt_buildsystem TrajectoryDependenant_MaxOrder TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable
Last change on this file since 945797 was 945797, checked in by Frederik Heber <heber@…>, 9 years ago

FIX: PotentialTrainer::getFirstGraphwithSpecifiedElements() did not match graph exactly.

  • we just checked for the set of elements to be contained but the given set of elements needs to be matched exactly, e.g. CNH4 contains CH4 but is not what we want.
  • Property mode set to 100644
File size: 8.7 KB
Line 
1/*
2 * Project: MoleCuilder
3 * Description: creates and alters molecular systems
4 * Copyright (C) 2014 Frederik Heber. All rights reserved.
5 *
6 *
7 * This file is part of MoleCuilder.
8 *
9 * MoleCuilder is free software: you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation, either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * MoleCuilder is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with MoleCuilder. If not, see <http://www.gnu.org/licenses/>.
21 */
22
23/*
24 * PotentialTrainer.cpp
25 *
26 * Created on: Sep 11, 2014
27 * Author: heber
28 */
29
30// include config.h
31#ifdef HAVE_CONFIG_H
32#include <config.h>
33#endif
34
35// needs to come before MemDebug due to placement new
36#include <boost/archive/text_iarchive.hpp>
37
38#include "CodePatterns/MemDebug.hpp"
39
40#include "PotentialTrainer.hpp"
41
42#include <algorithm>
43#include <boost/lambda/lambda.hpp>
44#include <boost/filesystem.hpp>
45#include <fstream>
46#include <sstream>
47
48#include "CodePatterns/Assert.hpp"
49#include "CodePatterns/Log.hpp"
50
51#include "Element/element.hpp"
52#include "Fragmentation/Homology/HomologyContainer.hpp"
53#include "Fragmentation/Homology/HomologyGraph.hpp"
54#include "FunctionApproximation/Extractors.hpp"
55#include "FunctionApproximation/FunctionApproximation.hpp"
56#include "FunctionApproximation/FunctionModel.hpp"
57#include "FunctionApproximation/TrainingData.hpp"
58#include "FunctionApproximation/writeDistanceEnergyTable.hpp"
59#include "Potentials/CompoundPotential.hpp"
60#include "Potentials/RegistrySerializer.hpp"
61#include "Potentials/SerializablePotential.hpp"
62
63PotentialTrainer::PotentialTrainer()
64{}
65
66PotentialTrainer::~PotentialTrainer()
67{}
68
69bool PotentialTrainer::operator()(
70 const HomologyContainer &_homologies,
71 const HomologyGraph &_graph,
72 const boost::filesystem::path &_trainingfile,
73 const unsigned int _maxiterations,
74 const double _threshold,
75 const unsigned int _best_of_howmany) const
76{
77 // fit potential
78 FunctionModel *model = new CompoundPotential(_graph);
79 ASSERT( model != NULL,
80 "PotentialTrainer::operator() - model is NULL.");
81
82 {
83 CompoundPotential *compound = static_cast<CompoundPotential *>(model);
84 if (compound->begin() == compound->end()) {
85 ELOG(1, "Could not find any suitable potentials for the compound potential.");
86 return false;
87 }
88 }
89
90 /******************** TRAINING ********************/
91 // fit potential
92 FunctionModel::parameters_t bestparams(model->getParameterDimension(), 0.);
93 {
94 // Afterwards we go through all of this type and gather the distance and the energy value
95 TrainingData data(model->getSpecificFilter());
96 data(_homologies.getHomologousGraphs(_graph));
97
98 // print distances and energies if desired for debugging
99 if (!data.getTrainingInputs().empty()) {
100 // print which distance is which
101 size_t counter=1;
102 if (DoLog(3)) {
103 const FunctionModel::arguments_t &inputs = data.getAllArguments()[0];
104 for (FunctionModel::arguments_t::const_iterator iter = inputs.begin();
105 iter != inputs.end(); ++iter) {
106 const argument_t &arg = *iter;
107 LOG(3, "DEBUG: distance " << counter++ << " is between (#"
108 << arg.indices.first << "c" << arg.types.first << ","
109 << arg.indices.second << "c" << arg.types.second << ").");
110 }
111 }
112
113 // print table
114 if (_trainingfile.string().empty()) {
115 LOG(3, "DEBUG: I gathered the following training data:\n" <<
116 _detail::writeDistanceEnergyTable(data.getDistanceEnergyTable()));
117 } else {
118 std::ofstream trainingstream(_trainingfile.string().c_str());
119 if (trainingstream.good()) {
120 LOG(3, "DEBUG: Writing training data to file " <<
121 _trainingfile.string() << ".");
122 trainingstream << _detail::writeDistanceEnergyTable(data.getDistanceEnergyTable());
123 }
124 trainingstream.close();
125 }
126 }
127
128 if ((_threshold < 1.) && (_best_of_howmany))
129 ELOG(2, "threshold parameter always overrules max_runs, both are specified.");
130 // now perform the function approximation by optimizing the model function
131 FunctionApproximation approximator(data, *model, _threshold, _maxiterations);
132 if (model->isBoxConstraint() && approximator.checkParameterDerivatives()) {
133 double l2error = std::numeric_limits<double>::max();
134 // seed with current time
135 srand((unsigned)time(0));
136 unsigned int runs=0;
137 // threshold overrules max_runs
138 const double threshold = _threshold;
139 const unsigned int max_runs = (threshold >= 1.) ? _best_of_howmany : 1;
140 LOG(1, "INFO: Maximum runs is " << max_runs << " and threshold set to " << threshold << ".");
141 do {
142 // generate new random initial parameter values
143 model->setParametersToRandomInitialValues(data);
144 LOG(1, "INFO: Initial parameters of run " << runs << " are "
145 << model->getParameters() << ".");
146 approximator(FunctionApproximation::ParameterDerivative);
147 LOG(1, "INFO: Final parameters of run " << runs << " are "
148 << model->getParameters() << ".");
149 const double new_l2error = data.getL2Error(*model);
150 if (new_l2error < l2error) {
151 // store currently best parameters
152 l2error = new_l2error;
153 bestparams = model->getParameters();
154 LOG(1, "STATUS: New fit from run " << runs
155 << " has better error of " << l2error << ".");
156 }
157 } while (( ++runs < max_runs) || (l2error > threshold));
158 // reset parameters from best fit
159 model->setParameters(bestparams);
160 LOG(1, "INFO: Best parameters with L2 error of "
161 << l2error << " are " << model->getParameters() << ".");
162 } else {
163 return false;
164 }
165
166 // create a map of each fragment with error.
167 HomologyContainer::range_t fragmentrange = _homologies.getHomologousGraphs(_graph);
168 TrainingData::L2ErrorConfigurationIndexMap_t WorseFragmentMap =
169 data.getWorstFragmentMap(*model, fragmentrange);
170 LOG(0, "RESULT: WorstFragmentMap " << WorseFragmentMap << ".");
171
172 }
173 delete model;
174
175 return true;
176}
177
178HomologyGraph PotentialTrainer::getFirstGraphwithSpecifiedElements(
179 const HomologyContainer &homologies,
180 const SerializablePotential::ParticleTypes_t &types)
181{
182 ASSERT( !types.empty(),
183 "getFirstGraphwithSpecifiedElements() - charges is empty?");
184 // create charges
185 Fragment::charges_t charges;
186 charges.resize(types.size());
187 std::transform(types.begin(), types.end(),
188 charges.begin(), boost::lambda::_1);
189 // convert into count map
190 Extractors::elementcounts_t counts_per_charge =
191 Extractors::_detail::getElementCounts(charges);
192 ASSERT( !counts_per_charge.empty(),
193 "getFirstGraphwithSpecifiedElements() - charge counts are empty?");
194 LOG(1, "DEBUG: counts_per_charge is " << counts_per_charge << ".");
195 // we want to check each (unique) key only once
196 HomologyContainer::const_key_iterator olditer = homologies.key_end();
197 for (HomologyContainer::const_key_iterator iter =
198 homologies.key_begin(); iter != homologies.key_end();
199 iter = homologies.getNextKey(iter)) {
200 // if it's the same as the old one, skip it
201 if (olditer == iter)
202 continue;
203 else
204 olditer = iter;
205 // check whether we have the same set of atomic numbers
206 const HomologyGraph::nodes_t &nodes = (*iter).getNodes();
207 Extractors::elementcounts_t nodes_counts_per_charge;
208 for (HomologyGraph::nodes_t::const_iterator nodeiter = nodes.begin();
209 nodeiter != nodes.end(); ++nodeiter) {
210 const Extractors::element_t elem = nodeiter->first.getAtomicNumber();
211 const std::pair<Extractors::elementcounts_t::iterator, bool> inserter =
212 nodes_counts_per_charge.insert( std::make_pair(elem, (Extractors::count_t)nodeiter->second ) );
213 if (!inserter.second)
214 inserter.first->second += (Extractors::count_t)nodeiter->second;
215 }
216 LOG(1, "DEBUG: Node (" << *iter << ")'s counts_per_charge is " << nodes_counts_per_charge << ".");
217 if (counts_per_charge == nodes_counts_per_charge)
218 return *iter;
219 }
220 return HomologyGraph();
221}
222
223SerializablePotential::ParticleTypes_t PotentialTrainer::getNumbersFromElements(
224 const std::vector<const element *> &fragment)
225{
226 SerializablePotential::ParticleTypes_t fragmentnumbers;
227 std::transform(fragment.begin(), fragment.end(), std::back_inserter(fragmentnumbers),
228 boost::bind(&element::getAtomicNumber, _1));
229 return fragmentnumbers;
230}
Note: See TracBrowser for help on using the repository browser.