source: util/src/ReMapDBondFile.py.in@ e1a46d

Last change on this file since e1a46d was e1a46d, checked in by Frederik Heber <heber@…>, 16 years ago

All of my python script put into ESPACK and adapted with @PYTHON@ and so on.

  • Property mode set to 100755
File size: 7.3 KB
Line 
1#!@PYTHON@
2#
3# Takes two pdb file and a dbond file, matches the coordinates and thus creates a mapping from old ids to new ids.
4
5import sys, random, math, re
6wrerr=sys.stderr.write
7wrout=sys.stdout.write
8
9# check arguments
10if len(sys.argv) < 5:
11 print "Usage: "+sys.argv[0]+" <src{PDB/XYZ}file> <destXYZfile> <offsetID> <offsetXYZ> <srcDBONDfile> [destDBONDfile]"
12 sys.exit(1)
13
14EPSILON=1e-3
15CUTOFF=2.
16inputsrc = open(sys.argv[1], "r")
17inputdestPDB = open(sys.argv[2], "r")
18inputsrcDBOND = open(sys.argv[5], "r")
19offsetID=int(sys.argv[3])
20offsetXYZ=int(sys.argv[4])
21entries = sys.argv[1].split(".")
22suffix = entries[-1]
23if len(sys.argv) > 6:
24 output = open(sys.argv[6],"w")
25else:
26 output = open(sys.argv[5]+".new", "w")
27
28# 1. first parse both PDB files into arrays (id, element, xyz) , therewhile scan BoundaryBoxes
29max = [ 0., 0., 0. ]
30min = [ 0., 0., 0. ]
31x = [ 0., 0., 0. ]
32nr=0
33srcAtoms = []
34#if "xyz" in suffix:
35# print "Scanning source XYZ file "+sys.argv[1]+"."
36# for line in inputsrc:
37# if (len(entries)<4 or (entries[0]!="O" and entries[0]!="Si" and entries[0]!="Ca")):
38# continue
39# entries = line.split()
40# for n in range(3):
41# x[n] = float(entries[offsetXYZ+n])
42# if x[n] > max[n]:
43# max[n] = x[n]
44# if x[n] < min[n]:
45# min[n] = x[n]
46# srcAtoms.append([int(entries[offsetID]), x[0], x[1], x[2]])
47# nr+=1
48#elif "pdb" in suffix:
49print "Scanning source PDB file "+sys.argv[1]+"."
50for line in inputsrc:
51 if "#" in line:
52 continue
53 if "END" in line:
54 break
55 if "ATOM" in line:
56 entries = line.split()
57 for n in range(3):
58 x[n] = float(entries[offsetXYZ+n])
59 if x[n] > max[n]:
60 max[n] = x[n]
61 if x[n] < min[n]:
62 min[n] = x[n]
63 srcAtoms.append([int(entries[offsetID]), x[0], x[1], x[2]])
64 nr+=1
65#else:
66# print "Cannot recognize input source file format: $suffix."
67# sys.exit(1)
68
69inputsrc.close()
70print "Scanned "+str(nr)+" source atoms."
71
72print "Scanning destination XYZ file "+sys.argv[2]+"."
73destAtoms = []
74nr = 0
75for line in inputdestPDB:
76 entries = line.split()
77 if (len(entries)<4 or (entries[0]!="O" and entries[0]!="Si" and entries[0]!="Ca")):
78 continue
79 for n in range(3):
80 x[n] = float(entries[1+n])
81 if x[n] > max[n]:
82 x[n]-=max[n]
83 if x[n] < min[n]:
84 x[n]+=max[n]
85 destAtoms.append([nr, x[0], x[1], x[2]])
86 nr+=1
87inputdestPDB.close()
88
89#nr=0
90#for line in inputdestPDB:
91# if "#" in line:
92# continue
93# if "END" in line:
94# break
95# if "ATOM" in line:
96# entries = line.split()
97# for n in range(3):
98# x[n] = float(entries[offsetXYZ+n])
99# if x[n] > max[n]:
100# max[n] = x[n]
101# if x[n] < min[n]:
102# min[n] = x[n]
103# destAtoms.append([int(entries[offsetID]), x[0], x[1], x[2]])
104# nr+=1
105#inputdestPDB.close()
106#
107print "Scanned "+str(nr)+" destination atoms."
108
109# 2. create Linked Cell with minimum distance box length
110print "Found Box bounds [%f,%f]x[%f,%f]x[%f,%f]." % (min[0],max[0],min[1],max[1],min[2],max[2])
111for i in range(3): # shift by minimum if below zero
112 if min[i] < 0:
113 max[i]-=min[i]
114 else:
115 min[i]=0
116cells_x=int(math.ceil(float(max[0])/CUTOFF))+1
117cells_y=int(math.ceil(float(max[1])/CUTOFF))+1
118cells_z=int(math.ceil(float(max[2])/CUTOFF))+1
119print "Number of cells in each axis direction (%f,%f,%f)." % (cells_x, cells_y, cells_z)
120
121# 3. put each atom into its cell, lists may contain multiple atoms, mark src(0) or dest (1)
122cell=[]
123for i in range(cells_x):
124 cell.append([])
125 for j in range(cells_y):
126 cell[i].append([])
127 for k in range(cells_z):
128 cell[i][j].append([0])
129for i in range(len(srcAtoms)):
130 atom = srcAtoms[i]
131 for n in range(3):
132 x[n] = int(math.floor(float(atom[1+n])/CUTOFF))
133 try:
134 cell[x[0]][x[1]][x[2]][0]+=1
135 cell[x[0]][x[1]][x[2]].append([0,i]) # 0 means src
136 except IndexError:
137 print "!IndexError with indices "+str(x[0])+" "+str(x[1])+" "+str(x[2])+" on source atom "+str(atom)+"."
138 #print "Source atom "+str(i)+" goes to cell "+str(x[0])+","+str(x[1])+","+str(x[2])+"."
139for i in range(len(destAtoms)):
140 atom = destAtoms[i]
141 for n in range(3):
142 x[n] = int(math.floor(float(atom[1+n])/CUTOFF))
143 try:
144 cell[x[0]][x[1]][x[2]][0]+=1
145 cell[x[0]][x[1]][x[2]].append([1,i]) # 1 means dest
146 except IndexError:
147 print "!IndexError with indices "+str(x[0])+" "+str(x[1])+" "+str(x[2])+" on destination atom "+str(atom)+"."
148 #print "Destination atom "+str(i)+" goes to cell "+str(x[0])+","+str(x[1])+","+str(x[2])+"."
149
150# 4. go through each cell and match (src, dest)-pairs by closest distance, warn if greater than EPSILON
151srcMatches=0
152destMatches=0
153Map = {}
154i=-1
155j=-1
156k=-1
157l=-1
158e=-1
159r=-1
160t=-1
161m=-1
162for i in range(cells_x):
163 for j in range(cells_y):
164 for k in range(cells_z):
165
166 #go through every atom in cell
167 try:
168 for l in range(1, cell[i][j][k][0]+1):
169 if cell[i][j][k][l][0] != 0: # skip if it's not a src atom
170 continue
171 atom1=cell[i][j][k][l][1]
172 #print "Current source atom is "+str(srcAtoms[atom1][0])+"."
173 currentPair=[atom1,-1]
174 oldDist=0.
175 # go through cell and all lower neighbours
176 for e in range(i-1,i+2):
177 #if on boarder continue periodic
178 #if e>cells_x-1:
179 # e=e-cells_x
180 if (e < 0) or (e >= cells_x):
181 continue
182 for r in range(j-1,j+2):
183 #if on boarder continue periodic
184 #if r>cells_y-1:
185 # r=r-cells_y
186 if (r < 0) or (r >= cells_y):
187 continue
188 for t in range(k-1,k+2):
189 #if on boarder continue periodic
190 #if t>cells_z-1:
191 # t=t-cells_z
192 if (t < 0) or (t >= cells_z):
193 continue
194 #go through all atoms in cell
195 for m in range(1, cell[e][r][t][0]+1):
196 if cell[e][r][t][m][0] != 1: # skip if it's not a dest atom
197 continue
198 atom2=cell[e][r][t][m][1]
199 #print "Current destination atom is "+str(destAtoms[atom2][0])+"."
200 dist=0
201 tmp=0
202 for n in range(3):
203 tmp = srcAtoms[atom1][1+n] - destAtoms[atom2][1+n]
204 dist += tmp*tmp
205 #print "Squared distance between the two is "+str(dist)+"."
206 if ((oldDist > dist) or ((currentPair[1] == -1) and (dist<EPSILON))):
207 currentPair[1] = atom2
208 oldDist = dist
209 if currentPair[1] == -1:
210 #wrerr("Could not find a suitable partner for srcAtom (%d,%d)!\n" % (srcAtoms[currentPair[0]][0],currentPair[1]))
211 Map[ srcAtoms[currentPair[0]][0] ] = currentPair[1]
212 else:
213 #print "Found a suitable partner for srcAtom "+str(srcAtoms[currentPair[0]][0])+","+str(destAtoms[currentPair[1]][0])+"."
214 srcMatches+=1
215 destMatches+=1
216 Map[ srcAtoms[currentPair[0]][0] ] = destAtoms[currentPair[1]][0]
217 except IndexError:
218 wrerr("Index Error: (%d,%d,%d)[%d] and (%d,%d,%d)[%d]\n" % (i,j,k,l,e,r,t,m))
219 break
220
221# 5. print the listing
222print "We have "+str(srcMatches)+" matching atoms."
223print "Mapping is:"
224for key in Map:
225 if Map[key] != -1:
226 print str(key)+" -> "+str(Map[key])
227#print Map # work also
228
229# 6. use the listing to rewrite the dbond file, store under given filename
230line=inputsrcDBOND.readline()
231output.write(line)
232for line in inputsrcDBOND:
233 if "#" in line:
234 continue
235 entries=line.split()
236 flag=0
237 for n in range(len(entries)):
238 try:
239 if Map[ int(entries[n]) ] == -1:
240 flag=1
241 except KeyError:
242 print "entries["+str(n)+"] = "+str(int(entries[n]))+" does not exist in Map."
243 flag=1
244 if flag==1:
245 continue
246 for n in range(len(entries)):
247 output.write("%d\t" % (Map[ int(entries[n]) ]))
248 output.write("\n")
249output.close()
250
251# exit
Note: See TracBrowser for help on using the repository browser.