source: src/Parser/TremoloParser.cpp@ 1a6bda

Action_Thermostats Add_AtomRandomPerturbation Add_FitFragmentPartialChargesAction Add_RotateAroundBondAction Add_SelectAtomByNameAction Added_ParseSaveFragmentResults AddingActions_SaveParseParticleParameters Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_ParticleName_to_Atom Adding_StructOpt_integration_tests AtomFragments Automaking_mpqc_open AutomationFragmentation_failures Candidate_v1.5.4 Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator CombiningParticlePotentialParsing Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_BoundInBox_CenterInBox_MoleculeActions Fix_ChargeSampling_PBC Fix_ChronosMutex Fix_FitPartialCharges Fix_FitPotential_needs_atomicnumbers Fix_ForceAnnealing Fix_IndependentFragmentGrids Fix_ParseParticles Fix_ParseParticles_split_forward_backward_Actions Fix_PopActions Fix_QtFragmentList_sorted_selection Fix_Restrictedkeyset_FragmentMolecule Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns Fix_fitting_potentials Fixes ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion FragmentAction_writes_AtomFragments FragmentMolecule_checks_bonddegrees GeometryObjects Gui_Fixes Gui_displays_atomic_force_velocity ImplicitCharges IndependentFragmentGrids IndependentFragmentGrids_IndividualZeroInstances IndependentFragmentGrids_IntegrationTest IndependentFragmentGrids_Sole_NN_Calculation JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix MoreRobust_FragmentAutomation ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PdbParser_setsAtomName PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks Rewrite_FitPartialCharges RotateToPrincipalAxisSystem_UndoRedo SaturateAtoms_findBestMatching SaturateAtoms_singleDegree StoppableMakroAction Subpackage_CodePatterns Subpackage_JobMarket Subpackage_LinearAlgebra Subpackage_levmar Subpackage_mpqc_open Subpackage_vmg Switchable_LogView ThirdParty_MPQC_rebuilt_buildsystem TrajectoryDependenant_MaxOrder TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps TremoloParser_setsAtomName Ubuntu_1604_changes stable
Last change on this file since 1a6bda was 72d108, checked in by Frederik Heber <heber@…>, 14 years ago

Rewrote TremoloParser::readAtomDataLine() to use boost::tokenizer.

  • The problem was that while the previous construct worked fine, it made it very hard to know what is wrong about a file because it did not operate on single tokens but on lines.
  • Property mode set to 100644
File size: 16.1 KB
Line 
1/*
2 * Project: MoleCuilder
3 * Description: creates and alters molecular systems
4 * Copyright (C) 2010 University of Bonn. All rights reserved.
5 * Please see the LICENSE file or "Copyright notice" in builder.cpp for details.
6 */
7
8/*
9 * TremoloParser.cpp
10 *
11 * Created on: Mar 2, 2010
12 * Author: metzler
13 */
14
15// include config.h
16#ifdef HAVE_CONFIG_H
17#include <config.h>
18#endif
19
20#include "CodePatterns/MemDebug.hpp"
21
22#include "CodePatterns/Assert.hpp"
23#include "CodePatterns/Log.hpp"
24#include "CodePatterns/Verbose.hpp"
25#include "TremoloParser.hpp"
26#include "World.hpp"
27#include "atom.hpp"
28#include "bond.hpp"
29#include "element.hpp"
30#include "molecule.hpp"
31#include "periodentafel.hpp"
32#include "Descriptors/AtomIdDescriptor.hpp"
33#include <map>
34#include <vector>
35
36#include <boost/tokenizer.hpp>
37#include <iostream>
38#include <iomanip>
39
40using namespace std;
41
42/**
43 * Constructor.
44 */
45TremoloParser::TremoloParser() {
46 knownKeys["x"] = TremoloKey::x;
47 knownKeys["u"] = TremoloKey::u;
48 knownKeys["F"] = TremoloKey::F;
49 knownKeys["stress"] = TremoloKey::stress;
50 knownKeys["Id"] = TremoloKey::Id;
51 knownKeys["neighbors"] = TremoloKey::neighbors;
52 knownKeys["imprData"] = TremoloKey::imprData;
53 knownKeys["GroupMeasureTypeNo"] = TremoloKey::GroupMeasureTypeNo;
54 knownKeys["Type"] = TremoloKey::Type;
55 knownKeys["extType"] = TremoloKey::extType;
56 knownKeys["name"] = TremoloKey::name;
57 knownKeys["resName"] = TremoloKey::resName;
58 knownKeys["chainID"] = TremoloKey::chainID;
59 knownKeys["resSeq"] = TremoloKey::resSeq;
60 knownKeys["occupancy"] = TremoloKey::occupancy;
61 knownKeys["tempFactor"] = TremoloKey::tempFactor;
62 knownKeys["segID"] = TremoloKey::segID;
63 knownKeys["Charge"] = TremoloKey::Charge;
64 knownKeys["charge"] = TremoloKey::charge;
65 knownKeys["GrpTypeNo"] = TremoloKey::GrpTypeNo;
66 knownKeys["torsion"] = TremoloKey::torsion;
67
68 // default behavior: use all possible keys on output
69 for (std::map<std::string, TremoloKey::atomDataKey>::iterator iter = knownKeys.begin(); iter != knownKeys.end(); ++iter)
70 usedFields.push_back(iter->first);
71
72 // and noKey afterwards(!) such that it is not used in usedFields
73 knownKeys[" "] = TremoloKey::noKey; // with this we can detect invalid keys
74}
75
76/**
77 * Destructor.
78 */
79TremoloParser::~TremoloParser() {
80 usedFields.clear();
81 additionalAtomData.clear();
82 atomIdMap.clear();
83 knownKeys.clear();
84}
85
86/**
87 * Loads atoms from a tremolo-formatted file.
88 *
89 * \param tremolo file
90 */
91void TremoloParser::load(istream* file) {
92 string line;
93 string::size_type location;
94
95 // reset atomIdMap, for we now get new serials
96 atomIdMap.clear();
97 usedFields.clear();
98
99 molecule *newmol = World::getInstance().createMolecule();
100 newmol->ActiveFlag = true;
101 // TODO: Remove the insertion into molecule when saving does not depend on them anymore. Also, remove molecule.hpp include
102 World::getInstance().getMolecules()->insert(newmol);
103 while (file->good()) {
104 std::getline(*file, line, '\n');
105 if (usedFields.empty()) {
106 location = line.find("ATOMDATA", 0);
107 if (location != string::npos) {
108 parseAtomDataKeysLine(line, location + 8);
109 }
110 }
111 if (line.length() > 0 && line.at(0) != '#') {
112 readAtomDataLine(line, newmol);
113 }
114 }
115
116 processNeighborInformation();
117 adaptImprData();
118 adaptTorsion();
119}
120
121/**
122 * Saves the \a atoms into as a tremolo file.
123 *
124 * \param file where to save the state
125 * \param atoms atoms to store
126 */
127void TremoloParser::save(ostream* file, const std::vector<atom *> &AtomList) {
128 DoLog(0) && (Log() << Verbose(0) << "Saving changes to tremolo." << std::endl);
129
130 vector<atom*>::const_iterator atomIt;
131 vector<string>::iterator it;
132
133 *file << "# ATOMDATA";
134 for (it=usedFields.begin(); it < usedFields.end(); it++) {
135 *file << "\t" << *it;
136 }
137 *file << endl;
138 for (atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) {
139 saveLine(file, *atomIt);
140 }
141}
142
143/**
144 * Sets the keys for which data should be written to the stream when save is
145 * called.
146 *
147 * \param string of field names with the same syntax as for an ATOMDATA line
148 * but without the prexix "ATOMDATA"
149 */
150void TremoloParser::setFieldsForSave(std::string atomDataLine) {
151 parseAtomDataKeysLine(atomDataLine, 0);
152}
153
154
155/**
156 * Writes one line of tremolo-formatted data to the provided stream.
157 *
158 * \param stream where to write the line to
159 * \param reference to the atom of which information should be written
160 */
161void TremoloParser::saveLine(ostream* file, atom* currentAtom) {
162 vector<string>::iterator it;
163 TremoloKey::atomDataKey currentField;
164
165 for (it = usedFields.begin(); it != usedFields.end(); it++) {
166 currentField = knownKeys[it->substr(0, it->find("="))];
167 switch (currentField) {
168 case TremoloKey::x :
169 // for the moment, assume there are always three dimensions
170 *file << currentAtom->at(0) << "\t";
171 *file << currentAtom->at(1) << "\t";
172 *file << currentAtom->at(2) << "\t";
173 break;
174 case TremoloKey::u :
175 // for the moment, assume there are always three dimensions
176 *file << currentAtom->AtomicVelocity[0] << "\t";
177 *file << currentAtom->AtomicVelocity[1] << "\t";
178 *file << currentAtom->AtomicVelocity[2] << "\t";
179 break;
180 case TremoloKey::Type :
181 *file << currentAtom->getType()->getSymbol() << "\t";
182 break;
183 case TremoloKey::Id :
184 *file << currentAtom->getId()+1 << "\t";
185 break;
186 case TremoloKey::neighbors :
187 writeNeighbors(file, atoi(it->substr(it->find("=") + 1, 1).c_str()), currentAtom);
188 break;
189 case TremoloKey::resSeq :
190 if (additionalAtomData.find(currentAtom->getId()) != additionalAtomData.end()) {
191 *file << additionalAtomData[currentAtom->getId()].get(currentField);
192 } else if (currentAtom->getMolecule() != NULL) {
193 *file << setw(4) << currentAtom->getMolecule()->getId()+1;
194 } else {
195 *file << defaultAdditionalData.get(currentField);
196 }
197 *file << "\t";
198 break;
199 default :
200 if (additionalAtomData.find(currentAtom->getId()) != additionalAtomData.end()) {
201 *file << additionalAtomData[currentAtom->getId()].get(currentField);
202 } else if (additionalAtomData.find(currentAtom->GetTrueFather()->getId()) != additionalAtomData.end()) {
203 *file << additionalAtomData[currentAtom->GetTrueFather()->getId()].get(currentField);
204 } else {
205 *file << defaultAdditionalData.get(currentField);
206 }
207 *file << "\t";
208 break;
209 }
210 }
211
212 *file << endl;
213}
214
215/**
216 * Writes the neighbor information of one atom to the provided stream.
217 *
218 * \param stream where to write neighbor information to
219 * \param number of neighbors
220 * \param reference to the atom of which to take the neighbor information
221 */
222void TremoloParser::writeNeighbors(ostream* file, int numberOfNeighbors, atom* currentAtom) {
223 BondList::iterator currentBond = currentAtom->ListOfBonds.begin();
224 for (int i = 0; i < numberOfNeighbors; i++) {
225 *file << (currentBond != currentAtom->ListOfBonds.end()
226 ? (*currentBond)->GetOtherAtom(currentAtom)->getId()+1 : 0) << "\t";
227 if (currentBond != currentAtom->ListOfBonds.end())
228 ++currentBond;
229 }
230}
231
232/**
233 * Stores keys from the ATOMDATA line.
234 *
235 * \param line to parse the keys from
236 * \param with which offset the keys begin within the line
237 */
238void TremoloParser::parseAtomDataKeysLine(string line, int offset) {
239 string keyword;
240 stringstream lineStream;
241
242 lineStream << line.substr(offset);
243 usedFields.clear();
244 while (lineStream.good()) {
245 lineStream >> keyword;
246 if (knownKeys[keyword.substr(0, keyword.find("="))] == TremoloKey::noKey) {
247 // TODO: throw exception about unknown key
248 cout << "Unknown key: " << keyword << " is not part of the tremolo format specification." << endl;
249 break;
250 }
251 usedFields.push_back(keyword);
252 }
253 //DoLog(1) && (Log() << Verbose(1) << "INFO: " << usedFields << std::endl);
254}
255
256/**
257 * Reads one data line of a tremolo file and interprets it according to the keys
258 * obtained from the ATOMDATA line.
259 *
260 * \param line to parse as an atom
261 * \param *newmol molecule to add atom to
262 */
263void TremoloParser::readAtomDataLine(string line, molecule *newmol = NULL) {
264 vector<string>::iterator it;
265 stringstream lineStream;
266 atom* newAtom = World::getInstance().createAtom();
267 TremoloAtomInfoContainer *atomInfo = NULL;
268 additionalAtomData[newAtom->getId()] = TremoloAtomInfoContainer(); // fill with default values
269 atomInfo = &additionalAtomData[newAtom->getId()];
270 TremoloKey::atomDataKey currentField;
271 ConvertTo<double> toDouble;
272 ConvertTo<int> toInt;
273
274 // setup tokenizer, splitting up white-spaced entries
275 typedef boost::tokenizer<boost::char_separator<char> >
276 tokenizer;
277 boost::char_separator<char> whitespacesep(" \t");
278 tokenizer tokens(line, whitespacesep);
279 ASSERT(tokens.begin() != tokens.end(),
280 "TremoloParser::readAtomDataLine - empty string, need at least ' '!");
281 tokenizer::iterator tok_iter = tokens.begin();
282 // then associate each token to each file
283 for (it = usedFields.begin(); it < usedFields.end(); it++) {
284 const std::string keyName = it->substr(0, it->find("="));
285 currentField = knownKeys[keyName];
286 const string word = *tok_iter;
287 //DoLog(1) && (Log() << Verbose(1) << "INFO: Parsing key " << keyName << " with remaining data " << word << std::endl);
288 switch (currentField) {
289 case TremoloKey::x :
290 // for the moment, assume there are always three dimensions
291 for (int i=0;i<NDIM;i++) {
292 ASSERT(tok_iter != tokens.end(), "TremoloParser::readAtomDataLine() - no value for x["+toString(i)+"]!");
293 //DoLog(1) && (Log() << Verbose(1) << "INFO: Parsing key " << keyName << " with next token " << *tok_iter << std::endl);
294 newAtom->set(i, toDouble(*tok_iter));
295 tok_iter++;
296 }
297 break;
298 case TremoloKey::u :
299 // for the moment, assume there are always three dimensions
300 for (int i=0;i<NDIM;i++) {
301 ASSERT(tok_iter != tokens.end(), "TremoloParser::readAtomDataLine() - no value for u["+toString(i)+"]!");
302 //DoLog(1) && (Log() << Verbose(1) << "INFO: Parsing key " << keyName << " with next token " << *tok_iter << std::endl);
303 newAtom->AtomicVelocity[i] = toDouble(*tok_iter);
304 tok_iter++;
305 }
306 break;
307 case TremoloKey::Type :
308 char type[3];
309 ASSERT(tok_iter != tokens.end(), "TremoloParser::readAtomDataLine() - no value for "+keyName+"!");
310 //DoLog(1) && (Log() << Verbose(1) << "INFO: Parsing key " << keyName << " with next token " << *tok_iter << std::endl);
311 strncpy(type, (*tok_iter).c_str(), 3);
312 tok_iter++;
313 //type[1]='\0'; // cutoff after first char, correct for ATOM entries
314 newAtom->setType(World::getInstance().getPeriode()->FindElement(type));
315 ASSERT(newAtom->getType(), "Type was not set for this atom");
316 break;
317 case TremoloKey::Id :
318 ASSERT(tok_iter != tokens.end(), "TremoloParser::readAtomDataLine() - no value for "+keyName+"!");
319 //DoLog(1) && (Log() << Verbose(1) << "INFO: Parsing key " << keyName << " with next token " << *tok_iter << std::endl);
320 atomIdMap[toInt(*tok_iter)] = newAtom->getId();
321 tok_iter++;
322 break;
323 case TremoloKey::neighbors :
324 for (int i=0;i<atoi(it->substr(it->find("=") + 1, 1).c_str());i++) {
325 ASSERT(tok_iter != tokens.end(), "TremoloParser::readAtomDataLine() - no value for "+keyName+"!");
326 //DoLog(1) && (Log() << Verbose(1) << "INFO: Parsing key " << keyName << " with next token " << *tok_iter << std::endl);
327 lineStream << *tok_iter << "\t";
328 tok_iter++;
329 }
330 readNeighbors(&lineStream,
331 atoi(it->substr(it->find("=") + 1, 1).c_str()), newAtom->getId());
332 break;
333 default :
334 ASSERT(tok_iter != tokens.end(), "TremoloParser::readAtomDataLine() - no value for "+keyName+"!");
335 //DoLog(1) && (Log() << Verbose(1) << "INFO: Parsing key " << keyName << " with next token " << *tok_iter << std::endl);
336 atomInfo->set(currentField, *tok_iter);
337 tok_iter++;
338 break;
339 }
340 }
341 if (newmol != NULL) {
342 //DoLog(0) && (Log() << Verbose(0) << "New Atom: " << *newAtom << " with type " << newAtom->getType()->getName() << std::endl);
343 newmol->AddAtom(newAtom);
344 }
345}
346
347/**
348 * Reads neighbor information for one atom from the input.
349 *
350 * \param line stream where to read the information from
351 * \param numberOfNeighbors number of neighbors to read
352 * \param atomid world id of the atom the information belongs to
353 */
354void TremoloParser::readNeighbors(stringstream* line, int numberOfNeighbors, int atomId) {
355 int neighborId = 0;
356 for (int i = 0; i < numberOfNeighbors; i++) {
357 *line >> neighborId;
358 // 0 is used to fill empty neighbor positions in the tremolo file.
359 if (neighborId > 0) {
360// DoLog(1) && (Log() << Verbose(1)
361// << "Atom with global id " << atomId
362// << " has neighbour with serial " << neighborId
363// << std::endl);
364 additionalAtomData[atomId].neighbors.push_back(neighborId);
365 }
366 }
367}
368
369/**
370 * Checks whether the provided name is within the list of used fields.
371 *
372 * \param field name to check
373 *
374 * \return true if the field name is used
375 */
376bool TremoloParser::isUsedField(string fieldName) {
377 bool fieldNameExists = false;
378 for (vector<string>::iterator usedField = usedFields.begin(); usedField != usedFields.end(); usedField++) {
379 if (usedField->substr(0, usedField->find("=")) == fieldName)
380 fieldNameExists = true;
381 }
382
383 return fieldNameExists;
384}
385
386
387/**
388 * Adds the collected neighbor information to the atoms in the world. The atoms
389 * are found by their current ID and mapped to the corresponding atoms with the
390 * Id found in the parsed file.
391 */
392void TremoloParser::processNeighborInformation() {
393 if (!isUsedField("neighbors")) {
394 return;
395 }
396
397 for(map<int, TremoloAtomInfoContainer>::iterator currentInfo = additionalAtomData.begin();
398 currentInfo != additionalAtomData.end(); currentInfo++
399 ) {
400 if (!currentInfo->second.neighbors_processed) {
401 for(vector<int>::iterator neighbor = currentInfo->second.neighbors.begin();
402 neighbor != currentInfo->second.neighbors.end(); neighbor++
403 ) {
404// DoLog(1) && (Log() << Verbose(1) << "Creating bond between ("
405// << currentInfo->first
406// << ") and ("
407// << atomIdMap[*neighbor] << "|" << *neighbor << ")" << std::endl);
408 World::getInstance().getAtom(AtomById(currentInfo->first))
409 ->addBond(World::getInstance().getAtom(AtomById(atomIdMap[*neighbor])));
410 }
411 currentInfo->second.neighbors_processed = true;
412 }
413 }
414}
415
416/**
417 * Replaces atom IDs read from the file by the corresponding world IDs. All IDs
418 * IDs of the input string will be replaced; expected separating characters are
419 * "-" and ",".
420 *
421 * \param string in which atom IDs should be adapted
422 *
423 * \return input string with modified atom IDs
424 */
425string TremoloParser::adaptIdDependentDataString(string data) {
426 // there might be no IDs
427 if (data == "-") {
428 return "-";
429 }
430
431 char separator;
432 int id;
433 stringstream line, result;
434 line << data;
435
436 line >> id;
437 result << atomIdMap[id];
438 while (line.good()) {
439 line >> separator >> id;
440 result << separator << atomIdMap[id];
441 }
442
443 return result.str();
444}
445
446/**
447 * Corrects the atom IDs in each imprData entry to the corresponding world IDs
448 * as they might differ from the originally read IDs.
449 */
450void TremoloParser::adaptImprData() {
451 if (!isUsedField("imprData")) {
452 return;
453 }
454
455 for(map<int, TremoloAtomInfoContainer>::iterator currentInfo = additionalAtomData.begin();
456 currentInfo != additionalAtomData.end(); currentInfo++
457 ) {
458 currentInfo->second.imprData = adaptIdDependentDataString(currentInfo->second.imprData);
459 }
460}
461
462/**
463 * Corrects the atom IDs in each torsion entry to the corresponding world IDs
464 * as they might differ from the originally read IDs.
465 */
466void TremoloParser::adaptTorsion() {
467 if (!isUsedField("torsion")) {
468 return;
469 }
470
471 for(map<int, TremoloAtomInfoContainer>::iterator currentInfo = additionalAtomData.begin();
472 currentInfo != additionalAtomData.end(); currentInfo++
473 ) {
474 currentInfo->second.torsion = adaptIdDependentDataString(currentInfo->second.torsion);
475 }
476}
477
Note: See TracBrowser for help on using the repository browser.