source: molecuilder/src/ReMapDBondFileFromXYZ.py@ 01d28a

Last change on this file since 01d28a was 3e20fe, checked in by Christian Neuen <neuen@…>, 17 years ago

Parses a pdb and xyz file and decideds which atoms are in both and which have been ommitted. It remaps the indices of the atoms, so that the indices are without gap and applies this remapping to a dbond file, renaming the bonds and removing the bonds which do not appear any more.

  • Property mode set to 100755
File size: 5.9 KB
Line 
1#!/usr/bin/python
2#
3# Takes two pdb file and a dbond file, matches the coordinates and thus creates a mapping from old ids to new ids.
4
5import sys, random, math, re
6wrerr=sys.stderr.write
7wrout=sys.stdout.write
8
9# check arguments
10if len(sys.argv) < 5:
11 print "Usage: "+sys.argv[0]+" <srcPDBfile> <destXYZfile> <offsetID> <offsetXYZ> <srcDBONDfile> [destDBONDfile]"
12 sys.exit(1)
13
14EPSILON=1e-3
15CUTOFF=2.
16inputsrcPDB = open(sys.argv[1], "r")
17inputdestPDB = open(sys.argv[2], "r")
18inputsrcDBOND = open(sys.argv[5], "r")
19offsetID=int(sys.argv[3])
20offsetXYZ=int(sys.argv[4])
21if len(sys.argv) > 6:
22 output = open(sys.argv[6],"w")
23else:
24 output = open(sys.argv[5]+".new", "w")
25
26# 1. first parse both PDB files into arrays (id, element, xyz) , therewhile scan BoundaryBoxes
27max = [ 0., 0., 0. ]
28min = [ 0., 0., 0. ]
29x = [ 0., 0., 0. ]
30print "Scanning source PDB file"+sys.argv[1]+"."
31srcAtoms = []
32for line in inputsrcPDB:
33 if "#" in line:
34 continue
35 if "END" in line:
36 break
37 if "ATOM" in line:
38 entries = line.split()
39 for n in range(3):
40 x[n] = float(entries[offsetXYZ+n])
41 if x[n] > max[n]:
42 max[n] = x[n]
43 if x[n] < min[n]:
44 min[n] = x[n]
45 srcAtoms.append([int(entries[offsetID]), x[0], x[1], x[2]])
46inputsrcPDB.close()
47print "Scanning destination XYZ file"+sys.argv[2]+"."
48destAtoms = []
49index = 0
50for line in inputdestPDB:
51 entries = line.split()
52 if (len(entries)<1 or (entries[0]!="O" and entries[0]!="Si" and entries[0]!="Ca")):
53 continue
54 for n in range(3):
55 x[n] = float(entries[1+n])
56 if x[n] > max[n]:
57 x[n]-=max[n]
58 if x[n] < min[n]:
59 x[n]+=max[n]
60 destAtoms.append([index, x[0], x[1], x[2]])
61 index+=1
62inputdestPDB.close()
63
64# 2. create Linked Cell with minimum distance box length
65print "Found Box bounds [%f,%f]x[%f,%f]x[%f,%f]." % (min[0],max[0],min[1],max[1],min[2],max[2])
66for i in range(3): # shift by minimum if below zero
67 if min[i] < 0:
68 max[i]-=min[i]
69 else:
70 min[i]=0
71cells_x=int(math.ceil(float(max[0])/CUTOFF))
72cells_y=int(math.ceil(float(max[1])/CUTOFF))
73cells_z=int(math.ceil(float(max[2])/CUTOFF))
74print "Number of cells in each axis direction (%f,%f,%f)." % (cells_x, cells_y, cells_z)
75
76# 3. put each atom into its cell, lists may contain multiple atoms, mark src(0) or dest (1)
77cell=[]
78for i in range(cells_x):
79 cell.append([])
80 for j in range(cells_y):
81 cell[i].append([])
82 for k in range(cells_z):
83 cell[i][j].append([0])
84for i in range(len(srcAtoms)):
85 atom = srcAtoms[i]
86 print atom
87 for n in range(3):
88 x[n] = int(math.floor(float(atom[1+n])/CUTOFF))
89 if cells_x ==x[0]:
90 x[0]-=1
91 if cells_y ==x[1]:
92 x[1]-=1
93 if cells_z ==x[2]:
94 x[2]-=1
95 print x
96 cell[x[0]][x[1]][x[2]][0]+=1
97 cell[x[0]][x[1]][x[2]].append([0,i]) # 0 means src
98 print "Source atom "+str(i)+" goes to cell "+str(x[0])+","+str(x[1])+","+str(x[2])+"."
99for i in range(len(destAtoms)):
100 atom = destAtoms[i]
101 for n in range(3):
102 x[n] = int(math.floor(float(atom[1+n])/CUTOFF))
103 cell[x[0]][x[1]][x[2]][0]+=1
104 cell[x[0]][x[1]][x[2]].append([1,i]) # 1 means dest
105 print "Destination atom "+str(i)+" goes to cell "+str(x[0])+","+str(x[1])+","+str(x[2])+"."
106
107# 4. go through each cell and match (src, dest)-pairs by closest distance, warn if greater than EPSILON
108srcMatches=0
109destMatches=0
110Map = {}
111i=-1
112j=-1
113k=-1
114l=-1
115e=-1
116r=-1
117t=-1
118m=-1
119for i in range(cells_x):
120 for j in range(cells_y):
121 for k in range(cells_z):
122
123 #go through every atom in cell
124 try:
125 for l in range(1, cell[i][j][k][0]+1):
126 if cell[i][j][k][l][0] != 0: # skip if it's not a src atom
127 continue
128 atom1=cell[i][j][k][l][1]
129 print "Current source atom is "+str(srcAtoms[atom1][0])+"."
130 currentPair=[atom1,-1]
131 oldDist=0.
132 # go through cell and all lower neighbours
133 for e in range(i-1,i+2):
134 #if on boarder continue periodic
135 #if e>cells_x-1:
136 # e=e-cells_x
137 if (e < 0) or (e >= cells_x):
138 continue
139 for r in range(j-1,j+2):
140 #if on boarder continue periodic
141 #if r>cells_y-1:
142 # r=r-cells_y
143 if (r < 0) or (r >= cells_y):
144 continue
145 for t in range(k-1,k+2):
146 #if on boarder continue periodic
147 #if t>cells_z-1:
148 # t=t-cells_z
149 if (t < 0) or (t >= cells_z):
150 continue
151 #go through all atoms in cell
152 for m in range(1, cell[e][r][t][0]+1):
153 if cell[e][r][t][m][0] != 1: # skip if it's not a dest atom
154 continue
155 atom2=cell[e][r][t][m][1]
156 print "Current destination atom is "+str(destAtoms[atom2][0])+"."
157 dist=0
158 tmp=0
159 for n in range(3):
160 tmp = srcAtoms[atom1][1+n] - destAtoms[atom2][1+n]
161 dist += tmp*tmp
162 print "Squared distance between the two is "+str(dist)+"."
163 if ((oldDist > dist) or ((currentPair[1] == -1) and (dist<EPSILON))):
164 currentPair[1] = atom2
165 oldDist = dist
166 if currentPair[1] == -1:
167 print"Could not find a suitable partner for srcAtom (%d,%d)!\n" % (srcAtoms[currentPair[0]][0],currentPair[1])
168 Map[ srcAtoms[currentPair[0]][0] ] = currentPair[1]
169 else:
170 print "Found a suitable partner for srcAtom "+str(srcAtoms[currentPair[0]][0])+","+str(destAtoms[currentPair[1]][0])+"."
171 srcMatches+=1
172 destMatches+=1
173 Map[ srcAtoms[currentPair[0]][0] ] = destAtoms[currentPair[1]][0]
174 except IndexError:
175 wrerr("Index Error: (%d,%d,%d)[%d] and (%d,%d,%d)[%d]\n" % (i,j,k,l,e,r,t,m))
176 break
177
178# 5. print the listing
179print "We have "+str(srcMatches)+" matching atoms."
180print "Mapping is:"
181for key in Map:
182 print str(key)+" -> "+str(Map[key])
183#print Map # work also
184
185# 6. use the listing to rewrite the dbond file, store under given filename
186for line in inputsrcDBOND:
187 if "#" in line:
188 continue
189 entries=line.split()
190 flag=0
191 for n in range(len(entries)):
192 if int(entries[n]) == 0 or Map[ int(entries[n]) ] == -1:
193 flag=1
194 if flag==1:
195 continue
196 for n in range(len(entries)):
197 output.write("%d\t" % (Map[ int(entries[n]) ]))
198 output.write("\n")
199output.close()
200print "We have "+str(srcMatches)+" matching atoms."
201# exit
Note: See TracBrowser for help on using the repository browser.