source: src/Parser/PdbParser.cpp@ 8b58ac

Action_Thermostats Add_AtomRandomPerturbation Add_FitFragmentPartialChargesAction Add_RotateAroundBondAction Add_SelectAtomByNameAction Added_ParseSaveFragmentResults AddingActions_SaveParseParticleParameters Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_ParticleName_to_Atom Adding_StructOpt_integration_tests AtomFragments Automaking_mpqc_open AutomationFragmentation_failures Candidate_v1.5.4 Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator CombiningParticlePotentialParsing Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_BoundInBox_CenterInBox_MoleculeActions Fix_ChargeSampling_PBC Fix_ChronosMutex Fix_FitPartialCharges Fix_FitPotential_needs_atomicnumbers Fix_ForceAnnealing Fix_IndependentFragmentGrids Fix_ParseParticles Fix_ParseParticles_split_forward_backward_Actions Fix_PopActions Fix_QtFragmentList_sorted_selection Fix_Restrictedkeyset_FragmentMolecule Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns Fix_fitting_potentials Fixes ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion FragmentAction_writes_AtomFragments FragmentMolecule_checks_bonddegrees GeometryObjects Gui_Fixes Gui_displays_atomic_force_velocity ImplicitCharges IndependentFragmentGrids IndependentFragmentGrids_IndividualZeroInstances IndependentFragmentGrids_IntegrationTest IndependentFragmentGrids_Sole_NN_Calculation JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix MoreRobust_FragmentAutomation ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PdbParser_setsAtomName PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks Rewrite_FitPartialCharges RotateToPrincipalAxisSystem_UndoRedo SaturateAtoms_findBestMatching SaturateAtoms_singleDegree StoppableMakroAction Subpackage_CodePatterns Subpackage_JobMarket Subpackage_LinearAlgebra Subpackage_levmar Subpackage_mpqc_open Subpackage_vmg Switchable_LogView ThirdParty_MPQC_rebuilt_buildsystem TrajectoryDependenant_MaxOrder TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps TremoloParser_setsAtomName Ubuntu_1604_changes stable
Last change on this file since 8b58ac was 94d5ac6, checked in by Frederik Heber <heber@…>, 12 years ago

FIX: As we use GSL internally, we are as of now required to use GPL v2 license.

  • GNU Scientific Library is used at every place in the code, especially the sub-package LinearAlgebra is based on it which in turn is used really everywhere in the remainder of MoleCuilder. Hence, we have to use the GPL license for the whole of MoleCuilder. In effect, GPL's COPYING was present all along and stated the terms of the GPL v2 license.
  • Hence, I added the default GPL v2 disclaimer to every source file and removed the note about a (actually missing) LICENSE file.
  • also, I added a help-redistribute action which again gives the disclaimer of the GPL v2.
  • also, I changed in the disclaimer that is printed at every program start in builder_init.cpp.
  • TEST: Added check on GPL statement present in every module to test CodeChecks project-disclaimer.
  • Property mode set to 100644
File size: 32.4 KB
Line 
1/*
2 * Project: MoleCuilder
3 * Description: creates and alters molecular systems
4 * Copyright (C) 2010-2012 University of Bonn. All rights reserved.
5 *
6 *
7 * This file is part of MoleCuilder.
8 *
9 * MoleCuilder is free software: you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation, either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * MoleCuilder is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with MoleCuilder. If not, see <http://www.gnu.org/licenses/>.
21 */
22
23/*
24 * PdbParser.cpp
25 *
26 * Created on: Aug 17, 2010
27 * Author: heber
28 */
29
30// include config.h
31#ifdef HAVE_CONFIG_H
32#include <config.h>
33#endif
34
35#include "CodePatterns/MemDebug.hpp"
36
37#include "CodePatterns/Assert.hpp"
38#include "CodePatterns/Log.hpp"
39#include "CodePatterns/toString.hpp"
40#include "CodePatterns/Verbose.hpp"
41
42#include "Atom/atom.hpp"
43#include "Bond/bond.hpp"
44#include "Descriptors/AtomIdDescriptor.hpp"
45#include "Element/element.hpp"
46#include "Element/periodentafel.hpp"
47#include "molecule.hpp"
48#include "MoleculeListClass.hpp"
49#include "Parser/PdbParser.hpp"
50#include "World.hpp"
51#include "WorldTime.hpp"
52
53#include <map>
54#include <vector>
55
56#include <iostream>
57#include <iomanip>
58
59using namespace std;
60
61// declare specialized static variables
62const std::string FormatParserTrait<pdb>::name = "pdb";
63const std::string FormatParserTrait<pdb>::suffix = "pdb";
64const ParserTypes FormatParserTrait<pdb>::type = pdb;
65
66/**
67 * Constructor.
68 */
69FormatParser< pdb >::FormatParser() :
70 FormatParser_common(NULL)
71{
72 knownTokens["ATOM"] = PdbKey::Atom;
73 knownTokens["HETATM"] = PdbKey::Atom;
74 knownTokens["TER"] = PdbKey::Filler;
75 knownTokens["END"] = PdbKey::EndOfTimestep;
76 knownTokens["CONECT"] = PdbKey::Connect;
77 knownTokens["REMARK"] = PdbKey::Remark;
78 knownTokens[""] = PdbKey::EndOfTimestep;
79
80 // argh, why can't just PdbKey::X+(size_t)i
81 PositionEnumMap[0] = PdbKey::X;
82 PositionEnumMap[1] = PdbKey::Y;
83 PositionEnumMap[2] = PdbKey::Z;
84}
85
86/**
87 * Destructor.
88 */
89FormatParser< pdb >::~FormatParser()
90{
91 PdbAtomInfoContainer::clearknownDataKeys();
92 additionalAtomData.clear();
93}
94
95
96/** Parses the initial word of the given \a line and returns the token type.
97 *
98 * @param line line to scan
99 * @return token type
100 */
101enum PdbKey::KnownTokens FormatParser< pdb >::getToken(std::string &line)
102{
103 // look for first space
104 std::string token = line.substr(0,6);
105 const size_t space_location = token.find(' ');
106 const size_t tab_location = token.find('\t');
107 size_t location = space_location < tab_location ? space_location : tab_location;
108 if (location != string::npos) {
109 //LOG(1, "Found space at position " << space_location);
110 token = token.substr(0,space_location);
111 }
112
113 //LOG(1, "Token is " << token);
114 if (knownTokens.count(token) == 0)
115 return PdbKey::NoToken;
116 else
117 return knownTokens[token];
118
119 return PdbKey::NoToken;
120}
121
122/**
123 * Loads atoms from a PDB-formatted file.
124 *
125 * \param PDB file
126 */
127void FormatParser< pdb >::load(istream* file) {
128 string line;
129 size_t linecount = 0;
130 enum PdbKey::KnownTokens token;
131
132 // reset id maps for this file (to correctly parse CONECT entries)
133 resetIdAssociations();
134
135 bool NotEndOfFile = true;
136 molecule *newmol = World::getInstance().createMolecule();
137 newmol->ActiveFlag = true;
138 unsigned int step = 0;
139 // TODO: Remove the insertion into molecule when saving does not depend on them anymore. Also, remove molecule.hpp include
140 World::getInstance().getMolecules()->insert(newmol);
141 while (NotEndOfFile) {
142 bool NotEndOfTimestep = true;
143 while (NotEndOfTimestep && NotEndOfFile) {
144 std::getline(*file, line, '\n');
145 if (!line.empty()) {
146 // extract first token
147 token = getToken(line);
148 switch (token) {
149 case PdbKey::Atom:
150 LOG(3,"INFO: Parsing ATOM entry for time step " << step << ".");
151 readAtomDataLine(step, line, newmol);
152 break;
153 case PdbKey::Remark:
154 LOG(3,"INFO: Parsing REM entry for time step " << step << ".");
155 break;
156 case PdbKey::Connect:
157 LOG(3,"INFO: Parsing CONECT entry for time step " << step << ".");
158 readNeighbors(step, line);
159 break;
160 case PdbKey::Filler:
161 LOG(3,"INFO: Stumbled upon Filler entry for time step " << step << ".");
162 break;
163 case PdbKey::EndOfTimestep:
164 LOG(1,"INFO: Parsing END entry or empty line for time step " << step << ".");
165 NotEndOfTimestep = false;
166 break;
167 default:
168 // TODO: put a throw here
169 ELOG(2, "Unknown token: '" << line << "' for time step " << step << ".");
170 //ASSERT(0, "FormatParser< pdb >::load() - Unknown token in line "+toString(linecount)+": "+line+".");
171 break;
172 }
173 }
174 NotEndOfFile = NotEndOfFile && (file->good());
175 linecount++;
176 }
177 ++step;
178 }
179 LOG(4, "INFO: Listing all newly parsed atoms.");
180 BOOST_FOREACH(atom *_atom, *newmol)
181 LOG(4, "INFO: Atom " << _atom->getName() << " " << *dynamic_cast<AtomInfo *>(_atom) << ".");
182
183 // refresh atom::nr and atom::name
184 newmol->getAtomCount();
185}
186
187/**
188 * Saves the \a atoms into as a PDB file.
189 *
190 * \param file where to save the state
191 * \param atoms atoms to store
192 */
193void FormatParser< pdb >::save(ostream* file, const std::vector<atom *> &AtomList)
194{
195 LOG(0, "Saving changes to pdb.");
196
197 // check for maximum number of time steps
198 size_t max_timesteps = 0;
199 BOOST_FOREACH(atom *_atom, World::getInstance().getAllAtoms()) {
200 LOG(4, "INFO: Atom " << _atom->getName() << " " << *dynamic_cast<AtomInfo *>(_atom) << ".");
201 if (_atom->getTrajectorySize() > max_timesteps)
202 max_timesteps = _atom->getTrajectorySize();
203 }
204 LOG(2,"INFO: Found a maximum of " << max_timesteps << " time steps to store.");
205
206 // re-distribute serials
207 resetIdAssociations();
208 // (new atoms might have been added)
209 int AtomNo = 1; // serial number starts at 1 in pdb
210 for (vector<atom *>::const_iterator atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) {
211 PdbAtomInfoContainer &atomInfo = getadditionalAtomData(*atomIt);
212 associateLocaltoGlobalId(AtomNo, (*atomIt)->getId());
213 atomInfo.set(PdbKey::serial, toString(AtomNo));
214 ++AtomNo;
215 }
216
217 // store all time steps (always do first step)
218 for (size_t step = 0; (step == 0) || (step < max_timesteps); ++step) {
219 {
220 // add initial remark
221 *file << "REMARK created by molecuilder on ";
222 time_t now = time((time_t *)NULL); // Get the system time and put it into 'now' as 'calender time'
223 // ctime ends in \n\0, we have to cut away the newline
224 std::string time(ctime(&now));
225 size_t pos = time.find('\n');
226 if (pos != 0)
227 *file << time.substr(0,pos);
228 else
229 *file << time;
230 *file << ", time step " << step;
231 *file << endl;
232 }
233
234 {
235 std::map<size_t,size_t> MolIdMap;
236 size_t MolNo = 1; // residue number starts at 1 in pdb
237 for (vector<atom *>::const_iterator atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) {
238 const molecule *mol = (*atomIt)->getMolecule();
239 if ((mol != NULL) && (MolIdMap.find(mol->getId()) == MolIdMap.end())) {
240 MolIdMap[mol->getId()] = MolNo++;
241 }
242 }
243 const size_t MaxMol = MolNo;
244
245 // have a count per element and per molecule (0 is for all homeless atoms)
246 std::vector<int> **elementNo = new std::vector<int>*[MaxMol];
247 for (size_t i = 0; i < MaxMol; ++i)
248 elementNo[i] = new std::vector<int>(MAX_ELEMENTS,1);
249 char name[MAXSTRINGSIZE];
250 std::string ResidueName;
251
252 // write ATOMs
253 for (vector<atom *>::const_iterator atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) {
254 PdbAtomInfoContainer &atomInfo = getadditionalAtomData(*atomIt);
255 // gather info about residue
256 const molecule *mol = (*atomIt)->getMolecule();
257 if (mol == NULL) {
258 MolNo = 0;
259 atomInfo.set(PdbKey::resSeq, "0");
260 } else {
261 ASSERT(MolIdMap.find(mol->getId()) != MolIdMap.end(),
262 "FormatParser< pdb >::save() - Mol id "+toString(mol->getId())+" not present despite we set it?!");
263 MolNo = MolIdMap[mol->getId()];
264 atomInfo.set(PdbKey::resSeq, toString(MolIdMap[mol->getId()]));
265 if (atomInfo.get<std::string>(PdbKey::resName) == "-")
266 atomInfo.set(PdbKey::resName, mol->getName().substr(0,3));
267 }
268 // get info about atom
269 const size_t Z = (*atomIt)->getType()->getAtomicNumber();
270 if (atomInfo.get<std::string>(PdbKey::name) == "-") { // if no name set, give it a new name
271 sprintf(name, "%2s%02d",(*atomIt)->getType()->getSymbol().c_str(), (*elementNo[MolNo])[Z]);
272 (*elementNo[MolNo])[Z] = ((*elementNo[MolNo])[Z]+1) % 100; // confine to two digits
273 atomInfo.set(PdbKey::name, name);
274 }
275 // set position
276 for (size_t i=0; i<NDIM;++i) {
277 stringstream position;
278 position << setw(8) << fixed << setprecision(3) << (*atomIt)->getPositionAtStep(step).at(i);
279 atomInfo.set(PositionEnumMap[i], position.str());
280 }
281 // change element and charge if changed
282 if (atomInfo.get<std::string>(PdbKey::element) != (*atomIt)->getType()->getSymbol()) {
283 std::string symbol = (*atomIt)->getType()->getSymbol();
284 if ((symbol[1] >= 'a') && (symbol[1] <= 'z'))
285 symbol[1] = (symbol[1] - 'a') + 'A';
286 atomInfo.set(PdbKey::element, symbol);
287 }
288
289 // finally save the line
290 saveLine(file, atomInfo);
291 }
292 for (size_t i = 0; i < MaxMol; ++i)
293 delete elementNo[i];
294 delete elementNo;
295
296 // write CONECTs
297 for (vector<atom *>::const_iterator atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) {
298 writeNeighbors(file, 4, *atomIt);
299 }
300 }
301 // END
302 *file << "END" << endl;
303 }
304
305}
306
307/** Add default info, when new atom is added to World.
308 *
309 * @param id of atom
310 */
311void FormatParser< pdb >::AtomInserted(atomId_t id)
312{
313 //LOG(3, "FormatParser< pdb >::AtomInserted() - notified of atom " << id << "'s insertion.");
314 ASSERT(!isPresentadditionalAtomData(id),
315 "FormatParser< pdb >::AtomInserted() - additionalAtomData already present for newly added atom "
316 +toString(id)+".");
317 // don't insert here as this is our check whether we are in the first time step
318 //additionalAtomData.insert( std::make_pair(id, defaultAdditionalData) );
319}
320
321/** Remove additional AtomData info, when atom has been removed from World.
322 *
323 * @param id of atom
324 */
325void FormatParser< pdb >::AtomRemoved(atomId_t id)
326{
327 //LOG(3, "FormatParser< pdb >::AtomRemoved() - notified of atom " << id << "'s removal.");
328 std::map<const atomId_t, PdbAtomInfoContainer>::iterator iter = additionalAtomData.find(id);
329 // as we do not insert AtomData on AtomInserted, we cannot be assured of its presence
330// ASSERT(iter != additionalAtomData.end(),
331// "FormatParser< pdb >::AtomRemoved() - additionalAtomData is not present for atom "
332// +toString(id)+" to remove.");
333 if (iter != additionalAtomData.end()) {
334 additionalAtomData.erase(iter);
335 }
336}
337
338
339/** Checks whether there is an entry for the given atom's \a _id.
340 *
341 * @param _id atom's id we wish to check on
342 * @return true - entry present, false - only for atom's father or no entry
343 */
344bool FormatParser< pdb >::isPresentadditionalAtomData(const atomId_t _id) const
345{
346 std::map<const atomId_t, PdbAtomInfoContainer>::const_iterator iter = additionalAtomData.find(_id);
347 return (iter != additionalAtomData.end());
348}
349
350
351/** Either returns reference to present entry or creates new with default values.
352 *
353 * @param _atom atom whose entry we desire
354 * @return
355 */
356PdbAtomInfoContainer& FormatParser< pdb >::getadditionalAtomData(atom *_atom)
357{
358 if (additionalAtomData.find(_atom->getId()) != additionalAtomData.end()) {
359 } else if (additionalAtomData.find(_atom->father->getId()) != additionalAtomData.end()) {
360 // use info from direct father
361 additionalAtomData[_atom->getId()] = additionalAtomData[_atom->father->getId()];
362 } else if (additionalAtomData.find(_atom->GetTrueFather()->getId()) != additionalAtomData.end()) {
363 // use info from topmost father
364 additionalAtomData[_atom->getId()] = additionalAtomData[_atom->GetTrueFather()->getId()];
365 } else {
366 // create new entry use default values if nothing else is known
367 additionalAtomData[_atom->getId()] = defaultAdditionalData;
368 }
369 return additionalAtomData[_atom->getId()];
370}
371
372/**
373 * Writes one line of PDB-formatted data to the provided stream.
374 *
375 * \param stream where to write the line to
376 * \param *currentAtom the atom of which information should be written
377 * \param AtomNo serial number of atom
378 * \param *name name of atom, i.e. H01
379 * \param ResidueName Name of molecule
380 * \param ResidueNo number of residue
381 */
382void FormatParser< pdb >::saveLine(
383 ostream* file,
384 const PdbAtomInfoContainer &atomInfo)
385{
386 *file << setfill(' ') << left << setw(6)
387 << atomInfo.get<std::string>(PdbKey::token);
388 *file << setfill(' ') << right << setw(5)
389 << atomInfo.get<int>(PdbKey::serial); /* atom serial number */
390 *file << " "; /* char 12 is empty */
391 *file << setfill(' ') << left << setw(4)
392 << atomInfo.get<std::string>(PdbKey::name); /* atom name */
393 *file << setfill(' ') << left << setw(1)
394 << atomInfo.get<std::string>(PdbKey::altLoc); /* alternate location/conformation */
395 *file << setfill(' ') << left << setw(3)
396 << atomInfo.get<std::string>(PdbKey::resName); /* residue name */
397 *file << " "; /* char 21 is empty */
398 *file << setfill(' ') << left << setw(1)
399 << atomInfo.get<std::string>(PdbKey::chainID); /* chain identifier */
400 *file << setfill(' ') << left << setw(4)
401 << atomInfo.get<int>(PdbKey::resSeq); /* residue sequence number */
402 *file << setfill(' ') << left << setw(1)
403 << atomInfo.get<std::string>(PdbKey::iCode); /* iCode */
404 *file << " "; /* char 28-30 are empty */
405 // have the following operate on stringstreams such that format specifiers
406 // only act on these
407 for (size_t i=0;i<NDIM;++i) {
408 stringstream position;
409 position << fixed << setprecision(3) << showpoint
410 << atomInfo.get<double>(PositionEnumMap[i]);
411 *file << setfill(' ') << right << setw(8) << position.str();
412 }
413 {
414 stringstream occupancy;
415 occupancy << fixed << setprecision(2) << showpoint
416 << atomInfo.get<double>(PdbKey::occupancy); /* occupancy */
417 *file << setfill(' ') << right << setw(6) << occupancy.str();
418 }
419 {
420 stringstream tempFactor;
421 tempFactor << fixed << setprecision(2) << showpoint
422 << atomInfo.get<double>(PdbKey::tempFactor); /* temperature factor */
423 *file << setfill(' ') << right << setw(6) << tempFactor.str();
424 }
425 *file << " "; /* char 68-76 are empty */
426 *file << setfill(' ') << right << setw(2) << atomInfo.get<std::string>(PdbKey::element); /* element */
427 *file << setfill(' ') << right << setw(2) << atomInfo.get<int>(PdbKey::charge); /* charge */
428
429 *file << endl;
430}
431
432/**
433 * Writes the neighbor information of one atom to the provided stream.
434 *
435 * Note that ListOfBonds of WorldTime::CurrentTime is used.
436 *
437 * Also, we fill up the CONECT line to extend over 80 chars.
438 *
439 * \param *file where to write neighbor information to
440 * \param MaxnumberOfNeighbors of neighbors
441 * \param *currentAtom to the atom of which to take the neighbor information
442 */
443void FormatParser< pdb >::writeNeighbors(ostream* file, int MaxnumberOfNeighbors, atom* currentAtom) {
444 int MaxNo = MaxnumberOfNeighbors;
445 int charsleft = 80;
446 const BondList & ListOfBonds = currentAtom->getListOfBonds();
447 if (!ListOfBonds.empty()) {
448 for(BondList::const_iterator currentBond = ListOfBonds.begin(); currentBond != ListOfBonds.end(); ++currentBond) {
449 if (MaxNo >= MaxnumberOfNeighbors) {
450 *file << "CONECT";
451 *file << setw(5) << getLocalId(currentAtom->getId());
452 charsleft = 80-6-5;
453 MaxNo = 0;
454 }
455 *file << setw(5) << getLocalId((*currentBond)->GetOtherAtom(currentAtom)->getId());
456 charsleft -= 5;
457 MaxNo++;
458 if (MaxNo == MaxnumberOfNeighbors) {
459 for (;charsleft > 0; charsleft--)
460 *file << ' ';
461 *file << "\n";
462 }
463 }
464 if (MaxNo != MaxnumberOfNeighbors) {
465 for (;charsleft > 0; charsleft--)
466 *file << ' ';
467 *file << "\n";
468 }
469 }
470}
471
472/** Either returns present atom with given id or a newly created one.
473 *
474 * @param id_string
475 * @return
476 */
477atom* FormatParser< pdb >::getAtomToParse(std::string id_string)
478{
479 // get the local ID
480 ConvertTo<int> toInt;
481 const unsigned int AtomID_local = toInt(id_string);
482 LOG(4, "INFO: Local id is "+toString(AtomID_local)+".");
483 // get the atomic ID if present
484 atom* newAtom = NULL;
485 if (getGlobalId(AtomID_local) != -1) {
486 const unsigned int AtomID_global = getGlobalId(AtomID_local);
487 LOG(4, "INFO: Global id present as " << AtomID_global << ".");
488 // check if atom exists
489 newAtom = World::getInstance().getAtom(AtomById(AtomID_global));
490 LOG(5, "INFO: Listing all present atoms with id.");
491 BOOST_FOREACH(atom *_atom, World::getInstance().getAllAtoms())
492 LOG(5, "INFO: " << *_atom << " with id " << _atom->getId());
493 }
494 // if not exists, create
495 if (newAtom == NULL) {
496 newAtom = World::getInstance().createAtom();
497 //const unsigned int AtomID_global = newAtom->getId();
498 LOG(4, "INFO: No association to global id present, creating atom.");
499 } else {
500 LOG(4, "INFO: Existing atom found: " << *newAtom << ".");
501 }
502 return newAtom;
503}
504
505/** read a line starting with key ATOM.
506 *
507 * We check for line's length and parse only up to this value.
508 *
509 * @param atomInfo container to put information in
510 * @param line line containing key ATOM
511 */
512void FormatParser< pdb >::readPdbAtomInfoContainer(PdbAtomInfoContainer &atomInfo, std::string &line) const
513{
514 const size_t length = line.length();
515 if (length < 80)
516 ELOG(2, "FormatParser< pdb >::readPdbAtomInfoContainer() - pdb is mal-formed, containing less than 80 chars!");
517 if (length >= 6) {
518 LOG(4,"INFO: Parsing token from "+line.substr(0,6)+".");
519 atomInfo.set(PdbKey::token, line.substr(0,6));
520 }
521 if (length >= 11) {
522 LOG(4,"INFO: Parsing serial from "+line.substr(6,5)+".");
523 atomInfo.set(PdbKey::serial, line.substr(6,5));
524 ASSERT(atomInfo.get<int>(PdbKey::serial) != 0,
525 "FormatParser< pdb >::readPdbAtomInfoContainer() - serial 0 is invalid (filler id for conect entries).");
526 }
527
528 if (length >= 16) {
529 LOG(4,"INFO: Parsing name from "+line.substr(12,4)+".");
530 atomInfo.set(PdbKey::name, line.substr(12,4));
531 }
532 if (length >= 17) {
533 LOG(4,"INFO: Parsing altLoc from "+line.substr(16,1)+".");
534 atomInfo.set(PdbKey::altLoc, line.substr(16,1));
535 }
536 if (length >= 20) {
537 LOG(4,"INFO: Parsing resName from "+line.substr(17,3)+".");
538 atomInfo.set(PdbKey::resName, line.substr(17,3));
539 }
540 if (length >= 22) {
541 LOG(4,"INFO: Parsing chainID from "+line.substr(21,1)+".");
542 atomInfo.set(PdbKey::chainID, line.substr(21,1));
543 }
544 if (length >= 26) {
545 LOG(4,"INFO: Parsing resSeq from "+line.substr(22,4)+".");
546 atomInfo.set(PdbKey::resSeq, line.substr(22,4));
547 }
548 if (length >= 27) {
549 LOG(4,"INFO: Parsing iCode from "+line.substr(26,1)+".");
550 atomInfo.set(PdbKey::iCode, line.substr(26,1));
551 }
552
553 if (length >= 60) {
554 LOG(4,"INFO: Parsing occupancy from "+line.substr(54,6)+".");
555 atomInfo.set(PdbKey::occupancy, line.substr(54,6));
556 }
557 if (length >= 66) {
558 LOG(4,"INFO: Parsing tempFactor from "+line.substr(60,6)+".");
559 atomInfo.set(PdbKey::tempFactor, line.substr(60,6));
560 }
561 if (length >= 80) {
562 LOG(4,"INFO: Parsing charge from "+line.substr(78,2)+".");
563 atomInfo.set(PdbKey::charge, line.substr(78,2));
564 }
565 if (length >= 78) {
566 LOG(4,"INFO: Parsing element from "+line.substr(76,2)+".");
567 atomInfo.set(PdbKey::element, line.substr(76,2));
568 } else {
569 LOG(4,"INFO: Trying to parse alternative element from name "+line.substr(12,4)+".");
570 atomInfo.set(PdbKey::element, line.substr(12,4));
571 }
572}
573
574/** Parse an ATOM line from a PDB file.
575 *
576 * Reads one data line of a pdstatus file and interprets it according to the
577 * specifications of the PDB 3.2 format: http://www.wwpdb.org/docs.html
578 *
579 * A new atom is created and filled with available information, non-
580 * standard information is placed in additionalAtomData at the atom's id.
581 *
582 * \param _step time step to use
583 * \param line to parse as an atom
584 * \param newmol molecule to add parsed atoms to
585 */
586void FormatParser< pdb >::readAtomDataLine(const unsigned int _step, std::string &line, molecule *newmol = NULL) {
587 vector<string>::iterator it;
588
589 atom* newAtom = getAtomToParse(line.substr(6,5));
590 LOG(3,"INFO: Parsing END entry or empty line.");
591 bool FirstTimestep = isPresentadditionalAtomData(newAtom->getId()) ? false : true;
592 ASSERT((FirstTimestep && (_step == 0)) || (!FirstTimestep && (_step !=0)),
593 "FormatParser< pdb >::readAtomDataLine() - additionalAtomData present though atom is newly parsed.");
594 if (FirstTimestep) {
595 LOG(3,"INFO: Parsing new atom "+toString(*newAtom)+" "+toString(newAtom->getId())+".");
596 } else {
597 LOG(3,"INFO: Parsing present atom "+toString(*newAtom)+".");
598 }
599 PdbAtomInfoContainer &atomInfo = getadditionalAtomData(newAtom);
600 LOG(4,"INFO: Information in container is "+toString(atomInfo)+".");
601
602 string word;
603 ConvertTo<size_t> toSize_t;
604
605 // check whether serial exists, if so, assign next available
606
607// LOG(2, "Split line:"
608// << line.substr(6,5) << "|"
609// << line.substr(12,4) << "|"
610// << line.substr(16,1) << "|"
611// << line.substr(17,3) << "|"
612// << line.substr(21,1) << "|"
613// << line.substr(22,4) << "|"
614// << line.substr(26,1) << "|"
615// << line.substr(30,8) << "|"
616// << line.substr(38,8) << "|"
617// << line.substr(46,8) << "|"
618// << line.substr(54,6) << "|"
619// << line.substr(60,6) << "|"
620// << line.substr(76,2) << "|"
621// << line.substr(78,2));
622
623 if (FirstTimestep) {
624 // first time step
625 // then fill info container
626 readPdbAtomInfoContainer(atomInfo, line);
627 // associate local with global id
628 associateLocaltoGlobalId(toSize_t(atomInfo.get<std::string>(PdbKey::serial)), newAtom->getId());
629 // set position
630 Vector tempVector;
631 LOG(4,"INFO: Parsing position from ("
632 +line.substr(30,8)+","
633 +line.substr(38,8)+","
634 +line.substr(46,8)+").");
635 PdbAtomInfoContainer::ScanKey(tempVector[0], line.substr(30,8));
636 PdbAtomInfoContainer::ScanKey(tempVector[1], line.substr(38,8));
637 PdbAtomInfoContainer::ScanKey(tempVector[2], line.substr(46,8));
638 newAtom->setPosition(tempVector);
639 // set element
640 std::string value = atomInfo.get<std::string>(PdbKey::element);
641 // make second character lower case if not
642 if ((value[1] >= 'A') && (value[1] <= 'Z'))
643 value[1] = (value[1] - 'A') + 'a';
644 const element *elem = World::getInstance().getPeriode()
645 ->FindElement(value);
646 ASSERT(elem != NULL,
647 "FormatParser< pdb >::readAtomDataLine() - element "+atomInfo.get<std::string>(PdbKey::element)+" is unknown!");
648 newAtom->setType(elem);
649
650 if (newmol != NULL)
651 newmol->AddAtom(newAtom);
652 } else {
653 // not first time step
654 // then parse into different container
655 PdbAtomInfoContainer consistencyInfo;
656 readPdbAtomInfoContainer(consistencyInfo, line);
657 // then check additional info for consistency
658 ASSERT(atomInfo.get<std::string>(PdbKey::token) == consistencyInfo.get<std::string>(PdbKey::token),
659 "FormatParser< pdb >::readAtomDataLine() - difference in token on multiple time step for atom with id "
660 +atomInfo.get<std::string>(PdbKey::serial)+"!");
661 ASSERT(atomInfo.get<std::string>(PdbKey::name) == consistencyInfo.get<std::string>(PdbKey::name),
662 "FormatParser< pdb >::readAtomDataLine() - difference in name on multiple time step for atom with id "
663 +atomInfo.get<std::string>(PdbKey::serial)+":"
664 +atomInfo.get<std::string>(PdbKey::name)+"!="
665 +consistencyInfo.get<std::string>(PdbKey::name)+".");
666 ASSERT(atomInfo.get<std::string>(PdbKey::altLoc) == consistencyInfo.get<std::string>(PdbKey::altLoc),
667 "FormatParser< pdb >::readAtomDataLine() - difference in altLoc on multiple time step for atom with id "
668 +atomInfo.get<std::string>(PdbKey::serial)+"!");
669 ASSERT(atomInfo.get<std::string>(PdbKey::resName) == consistencyInfo.get<std::string>(PdbKey::resName),
670 "FormatParser< pdb >::readAtomDataLine() - difference in resName on multiple time step for atom with id "
671 +atomInfo.get<std::string>(PdbKey::serial)+"!");
672 ASSERT(atomInfo.get<std::string>(PdbKey::chainID) == consistencyInfo.get<std::string>(PdbKey::chainID),
673 "FormatParser< pdb >::readAtomDataLine() - difference in chainID on multiple time step for atom with id "
674 +atomInfo.get<std::string>(PdbKey::serial)+"!");
675 ASSERT(atomInfo.get<std::string>(PdbKey::resSeq) == consistencyInfo.get<std::string>(PdbKey::resSeq),
676 "FormatParser< pdb >::readAtomDataLine() - difference in resSeq on multiple time step for atom with id "
677 +atomInfo.get<std::string>(PdbKey::serial)+"!");
678 ASSERT(atomInfo.get<std::string>(PdbKey::iCode) == consistencyInfo.get<std::string>(PdbKey::iCode),
679 "FormatParser< pdb >::readAtomDataLine() - difference in iCode on multiple time step for atom with id "
680 +atomInfo.get<std::string>(PdbKey::serial)+"!");
681 ASSERT(atomInfo.get<std::string>(PdbKey::occupancy) == consistencyInfo.get<std::string>(PdbKey::occupancy),
682 "FormatParser< pdb >::readAtomDataLine() - difference in occupancy on multiple time step for atom with id "
683 +atomInfo.get<std::string>(PdbKey::serial)+"!");
684 ASSERT(atomInfo.get<std::string>(PdbKey::tempFactor) == consistencyInfo.get<std::string>(PdbKey::tempFactor),
685 "FormatParser< pdb >::readAtomDataLine() - difference in tempFactor on multiple time step for atom with id "
686 +atomInfo.get<std::string>(PdbKey::serial)+"!");
687 ASSERT(atomInfo.get<std::string>(PdbKey::charge) == consistencyInfo.get<std::string>(PdbKey::charge),
688 "FormatParser< pdb >::readAtomDataLine() - difference in charge on multiple time step for atom with id "
689 +atomInfo.get<std::string>(PdbKey::serial)+"!");
690 ASSERT(atomInfo.get<std::string>(PdbKey::element) == consistencyInfo.get<std::string>(PdbKey::element),
691 "FormatParser< pdb >::readAtomDataLine() - difference in element on multiple time step for atom with id "
692 +atomInfo.get<std::string>(PdbKey::serial)+"!");
693 // and parse in trajectory
694 Vector tempVector;
695 LOG(4,"INFO: Parsing trajectory position from ("
696 +line.substr(30,8)+","
697 +line.substr(38,8)+","
698 +line.substr(46,8)+").");
699 PdbAtomInfoContainer::ScanKey(tempVector[0], line.substr(30,8));
700 PdbAtomInfoContainer::ScanKey(tempVector[1], line.substr(38,8));
701 PdbAtomInfoContainer::ScanKey(tempVector[2], line.substr(46,8));
702 LOG(4,"INFO: Adding trajectory point to time step "+toString(_step)+".");
703 // and set position at new time step
704 newAtom->setPositionAtStep(_step, tempVector);
705 }
706
707
708// printAtomInfo(newAtom);
709}
710
711/** Prints all PDB-specific information known about an atom.
712 *
713 */
714void FormatParser< pdb >::printAtomInfo(const atom * const newAtom) const
715{
716 const PdbAtomInfoContainer &atomInfo = additionalAtomData.at(newAtom->getId()); // operator[] const does not exist
717
718 LOG(1, "We know about atom " << newAtom->getId() << ":");
719 LOG(1, "\ttoken is " << atomInfo.get<std::string>(PdbKey::token));
720 LOG(1, "\tserial is " << atomInfo.get<int>(PdbKey::serial));
721 LOG(1, "\tname is " << atomInfo.get<std::string>(PdbKey::name));
722 LOG(1, "\taltLoc is " << atomInfo.get<std::string>(PdbKey::altLoc));
723 LOG(1, "\tresName is " << atomInfo.get<std::string>(PdbKey::resName));
724 LOG(1, "\tchainID is " << atomInfo.get<std::string>(PdbKey::chainID));
725 LOG(1, "\tresSeq is " << atomInfo.get<int>(PdbKey::resSeq));
726 LOG(1, "\tiCode is " << atomInfo.get<std::string>(PdbKey::iCode));
727 LOG(1, "\tX is " << atomInfo.get<double>(PdbKey::X));
728 LOG(1, "\tY is " << atomInfo.get<double>(PdbKey::Y));
729 LOG(1, "\tZ is " << atomInfo.get<double>(PdbKey::Z));
730 LOG(1, "\toccupancy is " << atomInfo.get<double>(PdbKey::occupancy));
731 LOG(1, "\ttempFactor is " << atomInfo.get<double>(PdbKey::tempFactor));
732 LOG(1, "\telement is '" << *(newAtom->getType()) << "'");
733 LOG(1, "\tcharge is " << atomInfo.get<int>(PdbKey::charge));
734}
735
736/**
737 * Reads neighbor information for one atom from the input.
738 *
739 * \param _step time step to use
740 * \param line to parse as an atom
741 */
742void FormatParser< pdb >::readNeighbors(const unsigned int _step, std::string &line)
743{
744 const size_t length = line.length();
745 std::list<size_t> ListOfNeighbors;
746 ConvertTo<size_t> toSize_t;
747
748 // obtain neighbours
749 // show split line for debugging
750 string output;
751 ASSERT(length >=16,
752 "FormatParser< pdb >::readNeighbors() - CONECT entry has not enough entries: "+line+"!");
753// output = "Split line:|";
754// output += line.substr(6,5) + "|";
755 const size_t id = toSize_t(line.substr(6,5));
756 for (size_t index = 11; index <= 26; index+=5) {
757 if (index+5 <= length) {
758 output += line.substr(index,5) + "|";
759 // search for digits
760 int otherid = -1;
761 PdbAtomInfoContainer::ScanKey(otherid, line.substr(index,5));
762 if (otherid > 0)
763 ListOfNeighbors.push_back(otherid);
764 else
765 ELOG(3, "FormatParser< pdb >::readNeighbors() - discarding CONECT entry with id 0.");
766 } else {
767 break;
768 }
769 }
770 LOG(4, output);
771
772 // add neighbours
773 atom *_atom = World::getInstance().getAtom(AtomById(getGlobalId(id)));
774 LOG(2, "STATUS: Atom " << _atom->getId() << " gets " << ListOfNeighbors.size() << " more neighbours.");
775 for (std::list<size_t>::const_iterator iter = ListOfNeighbors.begin();
776 iter != ListOfNeighbors.end();
777 ++iter) {
778 atom * const _Otheratom = World::getInstance().getAtom(AtomById(getGlobalId(*iter)));
779 LOG(3, "INFO: Adding Bond (" << *_atom << "," << *_Otheratom << ")");
780 _atom->addBond(_step, _Otheratom);
781 }
782}
783
784bool FormatParser< pdb >::operator==(const FormatParser< pdb >& b) const
785{
786 bool status = true;
787 World::AtomComposite atoms = World::getInstance().getAllAtoms();
788 for (World::AtomComposite::const_iterator iter = atoms.begin(); iter != atoms.end(); ++iter) {
789 if ((additionalAtomData.find((*iter)->getId()) != additionalAtomData.end())
790 && (b.additionalAtomData.find((*iter)->getId()) != b.additionalAtomData.end())) {
791 const PdbAtomInfoContainer &atomInfo = additionalAtomData.at((*iter)->getId());
792 const PdbAtomInfoContainer &OtheratomInfo = b.additionalAtomData.at((*iter)->getId());
793
794 status = status && (atomInfo.get<std::string>(PdbKey::serial) == OtheratomInfo.get<std::string>(PdbKey::serial));
795 if (!status) ELOG(1, "Mismatch in serials!");
796 status = status && (atomInfo.get<std::string>(PdbKey::name) == OtheratomInfo.get<std::string>(PdbKey::name));
797 if (!status) ELOG(1, "Mismatch in names!");
798 status = status && (atomInfo.get<std::string>(PdbKey::altLoc) == OtheratomInfo.get<std::string>(PdbKey::altLoc));
799 if (!status) ELOG(1, "Mismatch in altLocs!");
800 status = status && (atomInfo.get<std::string>(PdbKey::resName) == OtheratomInfo.get<std::string>(PdbKey::resName));
801 if (!status) ELOG(1, "Mismatch in resNames!");
802 status = status && (atomInfo.get<std::string>(PdbKey::chainID) == OtheratomInfo.get<std::string>(PdbKey::chainID));
803 if (!status) ELOG(1, "Mismatch in chainIDs!");
804 status = status && (atomInfo.get<std::string>(PdbKey::resSeq) == OtheratomInfo.get<std::string>(PdbKey::resSeq));
805 if (!status) ELOG(1, "Mismatch in resSeqs!");
806 status = status && (atomInfo.get<std::string>(PdbKey::iCode) == OtheratomInfo.get<std::string>(PdbKey::iCode));
807 if (!status) ELOG(1, "Mismatch in iCodes!");
808 status = status && (atomInfo.get<std::string>(PdbKey::occupancy) == OtheratomInfo.get<std::string>(PdbKey::occupancy));
809 if (!status) ELOG(1, "Mismatch in occupancies!");
810 status = status && (atomInfo.get<std::string>(PdbKey::tempFactor) == OtheratomInfo.get<std::string>(PdbKey::tempFactor));
811 if (!status) ELOG(1, "Mismatch in tempFactors!");
812 status = status && (atomInfo.get<std::string>(PdbKey::charge) == OtheratomInfo.get<std::string>(PdbKey::charge));
813 if (!status) ELOG(1, "Mismatch in charges!");
814 }
815 }
816
817 return status;
818}
819
Note: See TracBrowser for help on using the repository browser.