1 | /*
2 | * Project: MoleCuilder
3 | * Description: creates and alters molecular systems
4 | * Copyright (C) 2010-2012 University of Bonn. All rights reserved.
5 | * Please see the LICENSE file or "Copyright notice" in builder.cpp for details.
6 | */
7 |
8 | /*
9 | * PdbParser.cpp
10 | *
11 | * Created on: Aug 17, 2010
12 | * Author: heber
13 | */
14 |
15 | // include config.h
16 | #ifdef HAVE_CONFIG_H
17 | #include <config.h>
18 | #endif
19 |
20 | #include "CodePatterns/MemDebug.hpp"
21 |
22 | #include "CodePatterns/Assert.hpp"
23 | #include "CodePatterns/Log.hpp"
24 | #include "CodePatterns/toString.hpp"
25 | #include "CodePatterns/Verbose.hpp"
26 |
27 | #include "Atom/atom.hpp"
28 | #include "Bond/bond.hpp"
29 | #include "Descriptors/AtomIdDescriptor.hpp"
30 | #include "Element/element.hpp"
31 | #include "Element/periodentafel.hpp"
32 | #include "molecule.hpp"
33 | #include "MoleculeListClass.hpp"
34 | #include "Parser/PdbParser.hpp"
35 | #include "World.hpp"
36 | #include "WorldTime.hpp"
37 |
38 | #include <map>
39 | #include <vector>
40 |
41 | #include <iostream>
42 | #include <iomanip>
43 |
44 | using namespace std;
45 |
46 | // declare specialized static variables
47 | const std::string FormatParserTrait<pdb>::name = "pdb";
48 | const std::string FormatParserTrait<pdb>::suffix = "pdb";
49 | const ParserTypes FormatParserTrait<pdb>::type = pdb;
50 |
51 | /**
52 | * Constructor.
53 | */
54 | FormatParser< pdb >::FormatParser() :
55 | FormatParser_common(NULL)
56 | {
57 | knownTokens["ATOM"] = PdbKey::Atom;
58 | knownTokens["HETATM"] = PdbKey::Atom;
59 | knownTokens["TER"] = PdbKey::Filler;
60 | knownTokens["END"] = PdbKey::EndOfTimestep;
61 | knownTokens["CONECT"] = PdbKey::Connect;
62 | knownTokens["REMARK"] = PdbKey::Remark;
63 | knownTokens[""] = PdbKey::EndOfTimestep;
64 |
65 | // argh, why can't just PdbKey::X+(size_t)i
66 | PositionEnumMap[0] = PdbKey::X;
67 | PositionEnumMap[1] = PdbKey::Y;
68 | PositionEnumMap[2] = PdbKey::Z;
69 | }
70 |
71 | /**
72 | * Destructor.
73 | */
74 | FormatParser< pdb >::~FormatParser()
75 | {
76 | PdbAtomInfoContainer::clearknownDataKeys();
77 | additionalAtomData.clear();
78 | }
79 |
80 |
81 | /** Parses the initial word of the given \a line and returns the token type.
82 | *
83 | * @param line line to scan
84 | * @return token type
85 | */
86 | enum PdbKey::KnownTokens FormatParser< pdb >::getToken(string &line)
87 | {
88 | // look for first space
89 | std::string token = line.substr(0,6);
90 | const size_t space_location = token.find(' ');
91 | const size_t tab_location = token.find('\t');
92 | size_t location = space_location < tab_location ? space_location : tab_location;
93 | if (location != string::npos) {
94 | //LOG(1, "Found space at position " << space_location);
95 | token = token.substr(0,space_location);
96 | }
97 |
98 | //LOG(1, "Token is " << token);
99 | if (knownTokens.count(token) == 0)
100 | return PdbKey::NoToken;
101 | else
102 | return knownTokens[token];
103 |
104 | return PdbKey::NoToken;
105 | }
106 |
107 | /**
108 | * Loads atoms from a PDB-formatted file.
109 | *
110 | * \param PDB file
111 | */
112 | void FormatParser< pdb >::load(istream* file) {
113 | string line;
114 | size_t linecount = 0;
115 | enum PdbKey::KnownTokens token;
116 |
117 | // reset id maps for this file (to correctly parse CONECT entries)
118 | resetIdAssociations();
119 |
120 | bool NotEndOfFile = true;
121 | molecule *newmol = World::getInstance().createMolecule();
122 | newmol->ActiveFlag = true;
123 | unsigned int step = 0;
124 | // TODO: Remove the insertion into molecule when saving does not depend on them anymore. Also, remove molecule.hpp include
125 | World::getInstance().getMolecules()->insert(newmol);
126 | while (NotEndOfFile) {
127 | bool NotEndOfTimestep = true;
128 | while (NotEndOfTimestep && NotEndOfFile) {
129 | std::getline(*file, line, '\n');
130 | if (!line.empty()) {
131 | // extract first token
132 | token = getToken(line);
133 | switch (token) {
134 | case PdbKey::Atom:
135 | LOG(3,"INFO: Parsing ATOM entry for time step " << step << ".");
136 | readAtomDataLine(step, line, newmol);
137 | break;
138 | case PdbKey::Remark:
139 | LOG(3,"INFO: Parsing REM entry for time step " << step << ".");
140 | break;
141 | case PdbKey::Connect:
142 | LOG(3,"INFO: Parsing CONECT entry for time step " << step << ".");
143 | readNeighbors(step, line);
144 | break;
145 | case PdbKey::Filler:
146 | LOG(3,"INFO: Stumbled upon Filler entry for time step " << step << ".");
147 | break;
148 | case PdbKey::EndOfTimestep:
149 | LOG(1,"INFO: Parsing END entry or empty line for time step " << step << ".");
150 | NotEndOfTimestep = false;
151 | break;
152 | default:
153 | // TODO: put a throw here
154 | ELOG(2, "Unknown token: '" << line << "' for time step " << step << ".");
155 | //ASSERT(0, "FormatParser< pdb >::load() - Unknown token in line "+toString(linecount)+": "+line+".");
156 | break;
157 | }
158 | }
159 | NotEndOfFile = NotEndOfFile && (file->good());
160 | linecount++;
161 | }
162 | ++step;
163 | }
164 | LOG(4, "INFO: Listing all newly parsed atoms.");
165 | BOOST_FOREACH(atom *_atom, *newmol)
166 | LOG(4, "INFO: Atom " << _atom->getName() << " " << *dynamic_cast<AtomInfo *>(_atom) << ".");
167 |
168 | // refresh atom::nr and atom::name
169 | newmol->getAtomCount();
170 | }
171 |
172 | /**
173 | * Saves the \a atoms into as a PDB file.
174 | *
175 | * \param file where to save the state
176 | * \param atoms atoms to store
177 | */
178 | void FormatParser< pdb >::save(ostream* file, const std::vector<atom *> &AtomList)
179 | {
180 | LOG(0, "Saving changes to pdb.");
181 |
182 | // check for maximum number of time steps
183 | size_t max_timesteps = 0;
184 | BOOST_FOREACH(atom *_atom, World::getInstance().getAllAtoms()) {
185 | LOG(4, "INFO: Atom " << _atom->getName() << " " << *dynamic_cast<AtomInfo *>(_atom) << ".");
186 | if (_atom->getTrajectorySize() > max_timesteps)
187 | max_timesteps = _atom->getTrajectorySize();
188 | }
189 | LOG(2,"INFO: Found a maximum of " << max_timesteps << " time steps to store.");
190 |
191 | // re-distribute serials
192 | resetIdAssociations();
193 | // (new atoms might have been added)
194 | int AtomNo = 1; // serial number starts at 1 in pdb
195 | for (vector<atom *>::const_iterator atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) {
196 | PdbAtomInfoContainer &atomInfo = getadditionalAtomData(*atomIt);
197 | associateLocaltoGlobalId(AtomNo, (*atomIt)->getId());
198 | atomInfo.set(PdbKey::serial, toString(AtomNo));
199 | ++AtomNo;
200 | }
201 |
202 | // store all time steps (always do first step)
203 | for (size_t step = 0; (step == 0) || (step < max_timesteps); ++step) {
204 | {
205 | // add initial remark
206 | *file << "REMARK created by molecuilder on ";
207 | time_t now = time((time_t *)NULL); // Get the system time and put it into 'now' as 'calender time'
208 | // ctime ends in \n\0, we have to cut away the newline
209 | std::string time(ctime(&now));
210 | size_t pos = time.find('\n');
211 | if (pos != 0)
212 | *file << time.substr(0,pos);
213 | else
214 | *file << time;
215 | *file << ", time step " << step;
216 | *file << endl;
217 | }
218 |
219 | {
220 | std::map<size_t,size_t> MolIdMap;
221 | size_t MolNo = 1; // residue number starts at 1 in pdb
222 | for (vector<atom *>::const_iterator atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) {
223 | const molecule *mol = (*atomIt)->getMolecule();
224 | if ((mol != NULL) && (MolIdMap.find(mol->getId()) == MolIdMap.end())) {
225 | MolIdMap[mol->getId()] = MolNo++;
226 | }
227 | }
228 | const size_t MaxMol = MolNo;
229 |
230 | // have a count per element and per molecule (0 is for all homeless atoms)
231 | std::vector<int> **elementNo = new std::vector<int>*[MaxMol];
232 | for (size_t i = 0; i < MaxMol; ++i)
233 | elementNo[i] = new std::vector<int>(MAX_ELEMENTS,1);
234 | char name[MAXSTRINGSIZE];
235 | std::string ResidueName;
236 |
237 | // write ATOMs
238 | for (vector<atom *>::const_iterator atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) {
239 | PdbAtomInfoContainer &atomInfo = getadditionalAtomData(*atomIt);
240 | // gather info about residue
241 | const molecule *mol = (*atomIt)->getMolecule();
242 | if (mol == NULL) {
243 | MolNo = 0;
244 | atomInfo.set(PdbKey::resSeq, "0");
245 | } else {
246 | ASSERT(MolIdMap.find(mol->getId()) != MolIdMap.end(),
247 | "FormatParser< pdb >::save() - Mol id "+toString(mol->getId())+" not present despite we set it?!");
248 | MolNo = MolIdMap[mol->getId()];
249 | atomInfo.set(PdbKey::resSeq, toString(MolIdMap[mol->getId()]));
250 | if (atomInfo.get<std::string>(PdbKey::resName) == "-")
251 | atomInfo.set(PdbKey::resName, mol->getName().substr(0,3));
252 | }
253 | // get info about atom
254 | const size_t Z = (*atomIt)->getType()->getAtomicNumber();
255 | if (atomInfo.get<std::string>(PdbKey::name) == "-") { // if no name set, give it a new name
256 | sprintf(name, "%2s%02d",(*atomIt)->getType()->getSymbol().c_str(), (*elementNo[MolNo])[Z]);
257 | (*elementNo[MolNo])[Z] = ((*elementNo[MolNo])[Z]+1) % 100; // confine to two digits
258 | atomInfo.set(PdbKey::name, name);
259 | }
260 | // set position
261 | for (size_t i=0; i<NDIM;++i) {
262 | stringstream position;
263 | position << setw(8) << fixed << setprecision(3) << (*atomIt)->getPositionAtStep(step).at(i);
264 | atomInfo.set(PositionEnumMap[i], position.str());
265 | }
266 | // change element and charge if changed
267 | if (atomInfo.get<std::string>(PdbKey::element) != (*atomIt)->getType()->getSymbol()) {
268 | std::string symbol = (*atomIt)->getType()->getSymbol();
269 | if ((symbol[1] >= 'a') && (symbol[1] <= 'z'))
270 | symbol[1] = (symbol[1] - 'a') + 'A';
271 | atomInfo.set(PdbKey::element, symbol);
272 | }
273 |
274 | // finally save the line
275 | saveLine(file, atomInfo);
276 | }
277 | for (size_t i = 0; i < MaxMol; ++i)
278 | delete elementNo[i];
279 | delete elementNo;
280 |
281 | // write CONECTs
282 | for (vector<atom *>::const_iterator atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) {
283 | writeNeighbors(file, 4, *atomIt);
284 | }
285 | }
286 | // END
287 | *file << "END" << endl;
288 | }
289 |
290 | }
291 |
292 | /** Add default info, when new atom is added to World.
293 | *
294 | * @param id of atom
295 | */
296 | void FormatParser< pdb >::AtomInserted(atomId_t id)
297 | {
298 | //LOG(3, "FormatParser< pdb >::AtomInserted() - notified of atom " << id << "'s insertion.");
299 | ASSERT(!isPresentadditionalAtomData(id),
300 | "FormatParser< pdb >::AtomInserted() - additionalAtomData already present for newly added atom "
301 | +toString(id)+".");
302 | // don't insert here as this is our check whether we are in the first time step
303 | //additionalAtomData.insert( std::make_pair(id, defaultAdditionalData) );
304 | }
305 |
306 | /** Remove additional AtomData info, when atom has been removed from World.
307 | *
308 | * @param id of atom
309 | */
310 | void FormatParser< pdb >::AtomRemoved(atomId_t id)
311 | {
312 | //LOG(3, "FormatParser< pdb >::AtomRemoved() - notified of atom " << id << "'s removal.");
313 | std::map<const atomId_t, PdbAtomInfoContainer>::iterator iter = additionalAtomData.find(id);
314 | // as we do not insert AtomData on AtomInserted, we cannot be assured of its presence
315 | // ASSERT(iter != additionalAtomData.end(),
316 | // "FormatParser< pdb >::AtomRemoved() - additionalAtomData is not present for atom "
317 | // +toString(id)+" to remove.");
318 | if (iter != additionalAtomData.end()) {
319 | additionalAtomData.erase(iter);
320 | }
321 | }
322 |
323 |
324 | /** Checks whether there is an entry for the given atom's \a _id.
325 | *
326 | * @param _id atom's id we wish to check on
327 | * @return true - entry present, false - only for atom's father or no entry
328 | */
329 | bool FormatParser< pdb >::isPresentadditionalAtomData(const atomId_t _id) const
330 | {
331 | std::map<const atomId_t, PdbAtomInfoContainer>::const_iterator iter = additionalAtomData.find(_id);
332 | return (iter != additionalAtomData.end());
333 | }
334 |
335 |
336 | /** Either returns reference to present entry or creates new with default values.
337 | *
338 | * @param _atom atom whose entry we desire
339 | * @return
340 | */
341 | PdbAtomInfoContainer& FormatParser< pdb >::getadditionalAtomData(atom *_atom)
342 | {
343 | if (additionalAtomData.find(_atom->getId()) != additionalAtomData.end()) {
344 | } else if (additionalAtomData.find(_atom->father->getId()) != additionalAtomData.end()) {
345 | // use info from direct father
346 | additionalAtomData[_atom->getId()] = additionalAtomData[_atom->father->getId()];
347 | } else if (additionalAtomData.find(_atom->GetTrueFather()->getId()) != additionalAtomData.end()) {
348 | // use info from topmost father
349 | additionalAtomData[_atom->getId()] = additionalAtomData[_atom->GetTrueFather()->getId()];
350 | } else {
351 | // create new entry use default values if nothing else is known
352 | additionalAtomData[_atom->getId()] = defaultAdditionalData;
353 | }
354 | return additionalAtomData[_atom->getId()];
355 | }
356 |
357 | /**
358 | * Writes one line of PDB-formatted data to the provided stream.
359 | *
360 | * \param stream where to write the line to
361 | * \param *currentAtom the atom of which information should be written
362 | * \param AtomNo serial number of atom
363 | * \param *name name of atom, i.e. H01
364 | * \param ResidueName Name of molecule
365 | * \param ResidueNo number of residue
366 | */
367 | void FormatParser< pdb >::saveLine(
368 | ostream* file,
369 | const PdbAtomInfoContainer &atomInfo)
370 | {
371 | *file << setfill(' ') << left << setw(6)
372 | << atomInfo.get<std::string>(PdbKey::token);
373 | *file << setfill(' ') << right << setw(5)
374 | << atomInfo.get<int>(PdbKey::serial); /* atom serial number */
375 | *file << " "; /* char 12 is empty */
376 | *file << setfill(' ') << left << setw(4)
377 | << atomInfo.get<std::string>(PdbKey::name); /* atom name */
378 | *file << setfill(' ') << left << setw(1)
379 | << atomInfo.get<std::string>(PdbKey::altLoc); /* alternate location/conformation */
380 | *file << setfill(' ') << left << setw(3)
381 | << atomInfo.get<std::string>(PdbKey::resName); /* residue name */
382 | *file << " "; /* char 21 is empty */
383 | *file << setfill(' ') << left << setw(1)
384 | << atomInfo.get<std::string>(PdbKey::chainID); /* chain identifier */
385 | *file << setfill(' ') << left << setw(4)
386 | << atomInfo.get<int>(PdbKey::resSeq); /* residue sequence number */
387 | *file << setfill(' ') << left << setw(1)
388 | << atomInfo.get<std::string>(PdbKey::iCode); /* iCode */
389 | *file << " "; /* char 28-30 are empty */
390 | // have the following operate on stringstreams such that format specifiers
391 | // only act on these
392 | for (size_t i=0;i<NDIM;++i) {
393 | stringstream position;
394 | position << fixed << setprecision(3) << showpoint
395 | << atomInfo.get<double>(PositionEnumMap[i]);
396 | *file << setfill(' ') << right << setw(8) << position.str();
397 | }
398 | {
399 | stringstream occupancy;
400 | occupancy << fixed << setprecision(2) << showpoint
401 | << atomInfo.get<double>(PdbKey::occupancy); /* occupancy */
402 | *file << setfill(' ') << right << setw(6) << occupancy.str();
403 | }
404 | {
405 | stringstream tempFactor;
406 | tempFactor << fixed << setprecision(2) << showpoint
407 | << atomInfo.get<double>(PdbKey::tempFactor); /* temperature factor */
408 | *file << setfill(' ') << right << setw(6) << tempFactor.str();
409 | }
410 | *file << " "; /* char 68-76 are empty */
411 | *file << setfill(' ') << right << setw(2) << atomInfo.get<std::string>(PdbKey::element); /* element */
412 | *file << setfill(' ') << right << setw(2) << atomInfo.get<int>(PdbKey::charge); /* charge */
413 |
414 | *file << endl;
415 | }
416 |
417 | /**
418 | * Writes the neighbor information of one atom to the provided stream.
419 | *
420 | * Note that ListOfBonds of WorldTime::CurrentTime is used.
421 | *
422 | * Also, we fill up the CONECT line to extend over 80 chars.
423 | *
424 | * \param *file where to write neighbor information to
425 | * \param MaxnumberOfNeighbors of neighbors
426 | * \param *currentAtom to the atom of which to take the neighbor information
427 | */
428 | void FormatParser< pdb >::writeNeighbors(ostream* file, int MaxnumberOfNeighbors, atom* currentAtom) {
429 | int MaxNo = MaxnumberOfNeighbors;
430 | int charsleft = 80;
431 | const BondList & ListOfBonds = currentAtom->getListOfBonds();
432 | if (!ListOfBonds.empty()) {
433 | for(BondList::const_iterator currentBond = ListOfBonds.begin(); currentBond != ListOfBonds.end(); ++currentBond) {
434 | if (MaxNo >= MaxnumberOfNeighbors) {
435 | *file << "CONECT";
436 | *file << setw(5) << getLocalId(currentAtom->getId());
437 | charsleft = 80-6-5;
438 | MaxNo = 0;
439 | }
440 | *file << setw(5) << getLocalId((*currentBond)->GetOtherAtom(currentAtom)->getId());
441 | charsleft -= 5;
442 | MaxNo++;
443 | if (MaxNo == MaxnumberOfNeighbors) {
444 | for (;charsleft > 0; charsleft--)
445 | *file << ' ';
446 | *file << "\n";
447 | }
448 | }
449 | if (MaxNo != MaxnumberOfNeighbors) {
450 | for (;charsleft > 0; charsleft--)
451 | *file << ' ';
452 | *file << "\n";
453 | }
454 | }
455 | }
456 |
457 | /** Either returns present atom with given id or a newly created one.
458 | *
459 | * @param id_string
460 | * @return
461 | */
462 | atom* FormatParser< pdb >::getAtomToParse(std::string id_string)
463 | {
464 | // get the local ID
465 | ConvertTo<int> toInt;
466 | const unsigned int AtomID_local = toInt(id_string);
467 | LOG(4, "INFO: Local id is "+toString(AtomID_local)+".");
468 | // get the atomic ID if present
469 | atom* newAtom = NULL;
470 | if (getGlobalId(AtomID_local) != -1) {
471 | const unsigned int AtomID_global = getGlobalId(AtomID_local);
472 | LOG(4, "INFO: Global id present as " << AtomID_global << ".");
473 | // check if atom exists
474 | newAtom = World::getInstance().getAtom(AtomById(AtomID_global));
475 | LOG(5, "INFO: Listing all present atoms with id.");
476 | BOOST_FOREACH(atom *_atom, World::getInstance().getAllAtoms())
477 | LOG(5, "INFO: " << *_atom << " with id " << _atom->getId());
478 | }
479 | // if not exists, create
480 | if (newAtom == NULL) {
481 | newAtom = World::getInstance().createAtom();
482 | //const unsigned int AtomID_global = newAtom->getId();
483 | LOG(4, "INFO: No association to global id present, creating atom.");
484 | } else {
485 | LOG(4, "INFO: Existing atom found: " << *newAtom << ".");
486 | }
487 | return newAtom;
488 | }
489 |
490 | /** read a line starting with key ATOM.
491 | *
492 | * We check for line's length and parse only up to this value.
493 | *
494 | * @param atomInfo container to put information in
495 | * @param line line containing key ATOM
496 | */
497 | void FormatParser< pdb >::readPdbAtomInfoContainer(PdbAtomInfoContainer &atomInfo, std::string &line) const
498 | {
499 | const size_t length = line.length();
500 | if (length < 80)
501 | ELOG(2, "FormatParser< pdb >::readPdbAtomInfoContainer() - pdb is mal-formed, containing less than 80 chars!");
502 | if (length >= 6) {
503 | LOG(4,"INFO: Parsing token from "+line.substr(0,6)+".");
504 | atomInfo.set(PdbKey::token, line.substr(0,6));
505 | }
506 | if (length >= 11) {
507 | LOG(4,"INFO: Parsing serial from "+line.substr(6,5)+".");
508 | atomInfo.set(PdbKey::serial, line.substr(6,5));
509 | ASSERT(atomInfo.get<int>(PdbKey::serial) != 0,
510 | "FormatParser< pdb >::readPdbAtomInfoContainer() - serial 0 is invalid (filler id for conect entries).");
511 | }
512 |
513 | if (length >= 16) {
514 | LOG(4,"INFO: Parsing name from "+line.substr(12,4)+".");
515 | atomInfo.set(PdbKey::name, line.substr(12,4));
516 | }
517 | if (length >= 17) {
518 | LOG(4,"INFO: Parsing altLoc from "+line.substr(16,1)+".");
519 | atomInfo.set(PdbKey::altLoc, line.substr(16,1));
520 | }
521 | if (length >= 20) {
522 | LOG(4,"INFO: Parsing resName from "+line.substr(17,3)+".");
523 | atomInfo.set(PdbKey::resName, line.substr(17,3));
524 | }
525 | if (length >= 22) {
526 | LOG(4,"INFO: Parsing chainID from "+line.substr(21,1)+".");
527 | atomInfo.set(PdbKey::chainID, line.substr(21,1));
528 | }
529 | if (length >= 26) {
530 | LOG(4,"INFO: Parsing resSeq from "+line.substr(22,4)+".");
531 | atomInfo.set(PdbKey::resSeq, line.substr(22,4));
532 | }
533 | if (length >= 27) {
534 | LOG(4,"INFO: Parsing iCode from "+line.substr(26,1)+".");
535 | atomInfo.set(PdbKey::iCode, line.substr(26,1));
536 | }
537 |
538 | if (length >= 60) {
539 | LOG(4,"INFO: Parsing occupancy from "+line.substr(54,6)+".");
540 | atomInfo.set(PdbKey::occupancy, line.substr(54,6));
541 | }
542 | if (length >= 66) {
543 | LOG(4,"INFO: Parsing tempFactor from "+line.substr(60,6)+".");
544 | atomInfo.set(PdbKey::tempFactor, line.substr(60,6));
545 | }
546 | if (length >= 80) {
547 | LOG(4,"INFO: Parsing charge from "+line.substr(78,2)+".");
548 | atomInfo.set(PdbKey::charge, line.substr(78,2));
549 | }
550 | if (length >= 78) {
551 | LOG(4,"INFO: Parsing element from "+line.substr(76,2)+".");
552 | atomInfo.set(PdbKey::element, line.substr(76,2));
553 | } else {
554 | LOG(4,"INFO: Trying to parse alternative element from name "+line.substr(12,4)+".");
555 | atomInfo.set(PdbKey::element, line.substr(12,4));
556 | }
557 | }
558 |
559 | /** Parse an ATOM line from a PDB file.
560 | *
561 | * Reads one data line of a pdstatus file and interprets it according to the
562 | * specifications of the PDB 3.2 format: http://www.wwpdb.org/docs.html
563 | *
564 | * A new atom is created and filled with available information, non-
565 | * standard information is placed in additionalAtomData at the atom's id.
566 | *
567 | * \param _step time step to use
568 | * \param line to parse as an atom
569 | * \param newmol molecule to add parsed atoms to
570 | */
571 | void FormatParser< pdb >::readAtomDataLine(const unsigned int _step, std::string &line, molecule *newmol = NULL) {
572 | vector<string>::iterator it;
573 |
574 | atom* newAtom = getAtomToParse(line.substr(6,5));
575 | LOG(3,"INFO: Parsing END entry or empty line.");
576 | bool FirstTimestep = isPresentadditionalAtomData(newAtom->getId()) ? false : true;
577 | ASSERT((FirstTimestep && (_step == 0)) || (!FirstTimestep && (_step !=0)),
578 | "FormatParser< pdb >::readAtomDataLine() - additionalAtomData present though atom is newly parsed.");
579 | if (FirstTimestep) {
580 | LOG(3,"INFO: Parsing new atom "+toString(*newAtom)+" "+toString(newAtom->getId())+".");
581 | } else {
582 | LOG(3,"INFO: Parsing present atom "+toString(*newAtom)+".");
583 | }
584 | PdbAtomInfoContainer &atomInfo = getadditionalAtomData(newAtom);
585 | LOG(4,"INFO: Information in container is "+toString(atomInfo)+".");
586 |
587 | string word;
588 | ConvertTo<size_t> toSize_t;
589 |
590 | // check whether serial exists, if so, assign next available
591 |
592 | // LOG(2, "Split line:"
593 | // << line.substr(6,5) << "|"
594 | // << line.substr(12,4) << "|"
595 | // << line.substr(16,1) << "|"
596 | // << line.substr(17,3) << "|"
597 | // << line.substr(21,1) << "|"
598 | // << line.substr(22,4) << "|"
599 | // << line.substr(26,1) << "|"
600 | // << line.substr(30,8) << "|"
601 | // << line.substr(38,8) << "|"
602 | // << line.substr(46,8) << "|"
603 | // << line.substr(54,6) << "|"
604 | // << line.substr(60,6) << "|"
605 | // << line.substr(76,2) << "|"
606 | // << line.substr(78,2));
607 |
608 | if (FirstTimestep) {
609 | // first time step
610 | // then fill info container
611 | readPdbAtomInfoContainer(atomInfo, line);
612 | // associate local with global id
613 | associateLocaltoGlobalId(toSize_t(atomInfo.get<std::string>(PdbKey::serial)), newAtom->getId());
614 | // set position
615 | Vector tempVector;
616 | LOG(4,"INFO: Parsing position from ("
617 | +line.substr(30,8)+","
618 | +line.substr(38,8)+","
619 | +line.substr(46,8)+").");
620 | PdbAtomInfoContainer::ScanKey(tempVector[0], line.substr(30,8));
621 | PdbAtomInfoContainer::ScanKey(tempVector[1], line.substr(38,8));
622 | PdbAtomInfoContainer::ScanKey(tempVector[2], line.substr(46,8));
623 | newAtom->setPosition(tempVector);
624 | // set element
625 | std::string value = atomInfo.get<std::string>(PdbKey::element);
626 | // make second character lower case if not
627 | if ((value[1] >= 'A') && (value[1] <= 'Z'))
628 | value[1] = (value[1] - 'A') + 'a';
629 | const element *elem = World::getInstance().getPeriode()
630 | ->FindElement(value);
631 | ASSERT(elem != NULL,
632 | "FormatParser< pdb >::readAtomDataLine() - element "+atomInfo.get<std::string>(PdbKey::element)+" is unknown!");
633 | newAtom->setType(elem);
634 |
635 | if (newmol != NULL)
636 | newmol->AddAtom(newAtom);
637 | } else {
638 | // not first time step
639 | // then parse into different container
640 | PdbAtomInfoContainer consistencyInfo;
641 | readPdbAtomInfoContainer(consistencyInfo, line);
642 | // then check additional info for consistency
643 | ASSERT(atomInfo.get<std::string>(PdbKey::token) == consistencyInfo.get<std::string>(PdbKey::token),
644 | "FormatParser< pdb >::readAtomDataLine() - difference in token on multiple time step for atom with id "
645 | +atomInfo.get<std::string>(PdbKey::serial)+"!");
646 | ASSERT(atomInfo.get<std::string>(PdbKey::name) == consistencyInfo.get<std::string>(PdbKey::name),
647 | "FormatParser< pdb >::readAtomDataLine() - difference in name on multiple time step for atom with id "
648 | +atomInfo.get<std::string>(PdbKey::serial)+":"
649 | +atomInfo.get<std::string>(PdbKey::name)+"!="
650 | +consistencyInfo.get<std::string>(PdbKey::name)+".");
651 | ASSERT(atomInfo.get<std::string>(PdbKey::altLoc) == consistencyInfo.get<std::string>(PdbKey::altLoc),
652 | "FormatParser< pdb >::readAtomDataLine() - difference in altLoc on multiple time step for atom with id "
653 | +atomInfo.get<std::string>(PdbKey::serial)+"!");
654 | ASSERT(atomInfo.get<std::string>(PdbKey::resName) == consistencyInfo.get<std::string>(PdbKey::resName),
655 | "FormatParser< pdb >::readAtomDataLine() - difference in resName on multiple time step for atom with id "
656 | +atomInfo.get<std::string>(PdbKey::serial)+"!");
657 | ASSERT(atomInfo.get<std::string>(PdbKey::chainID) == consistencyInfo.get<std::string>(PdbKey::chainID),
658 | "FormatParser< pdb >::readAtomDataLine() - difference in chainID on multiple time step for atom with id "
659 | +atomInfo.get<std::string>(PdbKey::serial)+"!");
660 | ASSERT(atomInfo.get<std::string>(PdbKey::resSeq) == consistencyInfo.get<std::string>(PdbKey::resSeq),
661 | "FormatParser< pdb >::readAtomDataLine() - difference in resSeq on multiple time step for atom with id "
662 | +atomInfo.get<std::string>(PdbKey::serial)+"!");
663 | ASSERT(atomInfo.get<std::string>(PdbKey::iCode) == consistencyInfo.get<std::string>(PdbKey::iCode),
664 | "FormatParser< pdb >::readAtomDataLine() - difference in iCode on multiple time step for atom with id "
665 | +atomInfo.get<std::string>(PdbKey::serial)+"!");
666 | ASSERT(atomInfo.get<std::string>(PdbKey::occupancy) == consistencyInfo.get<std::string>(PdbKey::occupancy),
667 | "FormatParser< pdb >::readAtomDataLine() - difference in occupancy on multiple time step for atom with id "
668 | +atomInfo.get<std::string>(PdbKey::serial)+"!");
669 | ASSERT(atomInfo.get<std::string>(PdbKey::tempFactor) == consistencyInfo.get<std::string>(PdbKey::tempFactor),
670 | "FormatParser< pdb >::readAtomDataLine() - difference in tempFactor on multiple time step for atom with id "
671 | +atomInfo.get<std::string>(PdbKey::serial)+"!");
672 | ASSERT(atomInfo.get<std::string>(PdbKey::charge) == consistencyInfo.get<std::string>(PdbKey::charge),
673 | "FormatParser< pdb >::readAtomDataLine() - difference in charge on multiple time step for atom with id "
674 | +atomInfo.get<std::string>(PdbKey::serial)+"!");
675 | ASSERT(atomInfo.get<std::string>(PdbKey::element) == consistencyInfo.get<std::string>(PdbKey::element),
676 | "FormatParser< pdb >::readAtomDataLine() - difference in element on multiple time step for atom with id "
677 | +atomInfo.get<std::string>(PdbKey::serial)+"!");
678 | // and parse in trajectory
679 | Vector tempVector;
680 | LOG(4,"INFO: Parsing trajectory position from ("
681 | +line.substr(30,8)+","
682 | +line.substr(38,8)+","
683 | +line.substr(46,8)+").");
684 | PdbAtomInfoContainer::ScanKey(tempVector[0], line.substr(30,8));
685 | PdbAtomInfoContainer::ScanKey(tempVector[1], line.substr(38,8));
686 | PdbAtomInfoContainer::ScanKey(tempVector[2], line.substr(46,8));
687 | LOG(4,"INFO: Adding trajectory point to time step "+toString(_step)+".");
688 | // and set position at new time step
689 | newAtom->setPositionAtStep(_step, tempVector);
690 | }
691 |
692 |
693 | // printAtomInfo(newAtom);
694 | }
695 |
696 | /** Prints all PDB-specific information known about an atom.
697 | *
698 | */
699 | void FormatParser< pdb >::printAtomInfo(const atom * const newAtom) const
700 | {
701 | const PdbAtomInfoContainer &atomInfo = additionalAtomData.at(newAtom->getId()); // operator[] const does not exist
702 |
703 | LOG(1, "We know about atom " << newAtom->getId() << ":");
704 | LOG(1, "\ttoken is " << atomInfo.get<std::string>(PdbKey::token));
705 | LOG(1, "\tserial is " << atomInfo.get<int>(PdbKey::serial));
706 | LOG(1, "\tname is " << atomInfo.get<std::string>(PdbKey::name));
707 | LOG(1, "\taltLoc is " << atomInfo.get<std::string>(PdbKey::altLoc));
708 | LOG(1, "\tresName is " << atomInfo.get<std::string>(PdbKey::resName));
709 | LOG(1, "\tchainID is " << atomInfo.get<std::string>(PdbKey::chainID));
710 | LOG(1, "\tresSeq is " << atomInfo.get<int>(PdbKey::resSeq));
711 | LOG(1, "\tiCode is " << atomInfo.get<std::string>(PdbKey::iCode));
712 | LOG(1, "\tX is " << atomInfo.get<double>(PdbKey::X));
713 | LOG(1, "\tY is " << atomInfo.get<double>(PdbKey::Y));
714 | LOG(1, "\tZ is " << atomInfo.get<double>(PdbKey::Z));
715 | LOG(1, "\toccupancy is " << atomInfo.get<double>(PdbKey::occupancy));
716 | LOG(1, "\ttempFactor is " << atomInfo.get<double>(PdbKey::tempFactor));
717 | LOG(1, "\telement is '" << *(newAtom->getType()) << "'");
718 | LOG(1, "\tcharge is " << atomInfo.get<int>(PdbKey::charge));
719 | }
720 |
721 | /**
722 | * Reads neighbor information for one atom from the input.
723 | *
724 | * \param _step time step to use
725 | * \param line to parse as an atom
726 | */
727 | void FormatParser< pdb >::readNeighbors(const unsigned int _step, std::string &line)
728 | {
729 | const size_t length = line.length();
730 | std::list<size_t> ListOfNeighbors;
731 | ConvertTo<size_t> toSize_t;
732 |
733 | // obtain neighbours
734 | // show split line for debugging
735 | string output;
736 | ASSERT(length >=16,
737 | "FormatParser< pdb >::readNeighbors() - CONECT entry has not enough entries: "+line+"!");
738 | // output = "Split line:|";
739 | // output += line.substr(6,5) + "|";
740 | const size_t id = toSize_t(line.substr(6,5));
741 | for (size_t index = 11; index <= 26; index+=5) {
742 | if (index+5 <= length) {
743 | output += line.substr(index,5) + "|";
744 | // search for digits
745 | int otherid = -1;
746 | PdbAtomInfoContainer::ScanKey(otherid, line.substr(index,5));
747 | if (otherid > 0)
748 | ListOfNeighbors.push_back(otherid);
749 | else
750 | ELOG(3, "FormatParser< pdb >::readNeighbors() - discarding CONECT entry with id 0.");
751 | } else {
752 | break;
753 | }
754 | }
755 | LOG(4, output);
756 |
757 | // add neighbours
758 | atom *_atom = World::getInstance().getAtom(AtomById(getGlobalId(id)));
759 | LOG(2, "STATUS: Atom " << _atom->getId() << " gets " << ListOfNeighbors.size() << " more neighbours.");
760 | for (std::list<size_t>::const_iterator iter = ListOfNeighbors.begin();
761 | iter != ListOfNeighbors.end();
762 | ++iter) {
763 | atom * const _Otheratom = World::getInstance().getAtom(AtomById(getGlobalId(*iter)));
764 | LOG(3, "INFO: Adding Bond (" << *_atom << "," << *_Otheratom << ")");
765 | _atom->addBond(_step, _Otheratom);
766 | }
767 | }
768 |
769 | bool FormatParser< pdb >::operator==(const FormatParser< pdb >& b) const
770 | {
771 | bool status = true;
772 | World::AtomComposite atoms = World::getInstance().getAllAtoms();
773 | for (World::AtomComposite::const_iterator iter = atoms.begin(); iter != atoms.end(); ++iter) {
774 | if ((additionalAtomData.find((*iter)->getId()) != additionalAtomData.end())
775 | && (b.additionalAtomData.find((*iter)->getId()) != b.additionalAtomData.end())) {
776 | const PdbAtomInfoContainer &atomInfo = additionalAtomData.at((*iter)->getId());
777 | const PdbAtomInfoContainer &OtheratomInfo = b.additionalAtomData.at((*iter)->getId());
778 |
779 | status = status && (atomInfo.get<std::string>(PdbKey::serial) == OtheratomInfo.get<std::string>(PdbKey::serial));
780 | if (!status) ELOG(1, "Mismatch in serials!");
781 | status = status && (atomInfo.get<std::string>(PdbKey::name) == OtheratomInfo.get<std::string>(PdbKey::name));
782 | if (!status) ELOG(1, "Mismatch in names!");
783 | status = status && (atomInfo.get<std::string>(PdbKey::altLoc) == OtheratomInfo.get<std::string>(PdbKey::altLoc));
784 | if (!status) ELOG(1, "Mismatch in altLocs!");
785 | status = status && (atomInfo.get<std::string>(PdbKey::resName) == OtheratomInfo.get<std::string>(PdbKey::resName));
786 | if (!status) ELOG(1, "Mismatch in resNames!");
787 | status = status && (atomInfo.get<std::string>(PdbKey::chainID) == OtheratomInfo.get<std::string>(PdbKey::chainID));
788 | if (!status) ELOG(1, "Mismatch in chainIDs!");
789 | status = status && (atomInfo.get<std::string>(PdbKey::resSeq) == OtheratomInfo.get<std::string>(PdbKey::resSeq));
790 | if (!status) ELOG(1, "Mismatch in resSeqs!");
791 | status = status && (atomInfo.get<std::string>(PdbKey::iCode) == OtheratomInfo.get<std::string>(PdbKey::iCode));
792 | if (!status) ELOG(1, "Mismatch in iCodes!");
793 | status = status && (atomInfo.get<std::string>(PdbKey::occupancy) == OtheratomInfo.get<std::string>(PdbKey::occupancy));
794 | if (!status) ELOG(1, "Mismatch in occupancies!");
795 | status = status && (atomInfo.get<std::string>(PdbKey::tempFactor) == OtheratomInfo.get<std::string>(PdbKey::tempFactor));
796 | if (!status) ELOG(1, "Mismatch in tempFactors!");
797 | status = status && (atomInfo.get<std::string>(PdbKey::charge) == OtheratomInfo.get<std::string>(PdbKey::charge));
798 | if (!status) ELOG(1, "Mismatch in charges!");
799 | }
800 | }
801 |
802 | return status;
803 | }
804 |