Context Navigation

ReMapDBondFile.py.in@ e1a46d

Visit:

Last change on this file since e1a46d was e1a46d, checked in by Frederik Heber <heber@…>, 16 years ago
All of my python script put into ESPACK and adapted with @PYTHON@ and so on.
Property mode set to `100755`
File size: 7.3 KB

Line
1	#!@PYTHON@
2	#
3	# Takes two pdb file and a dbond file, matches the coordinates and thus creates a mapping from old ids to new ids.
4
5	import sys, random, math, re
6	wrerr=sys.stderr.write
7	wrout=sys.stdout.write
8
9	# check arguments
10	if len(sys.argv) < 5:
11	print "Usage: "+sys.argv[0]+" <src{PDB/XYZ}file> <destXYZfile> <offsetID> <offsetXYZ> <srcDBONDfile> [destDBONDfile]"
12	sys.exit(1)
13
14	EPSILON=1e-3
15	CUTOFF=2.
16	inputsrc = open(sys.argv[1], "r")
17	inputdestPDB = open(sys.argv[2], "r")
18	inputsrcDBOND = open(sys.argv[5], "r")
19	offsetID=int(sys.argv[3])
20	offsetXYZ=int(sys.argv[4])
21	entries = sys.argv[1].split(".")
22	suffix = entries[-1]
23	if len(sys.argv) > 6:
24	output = open(sys.argv[6],"w")
25	else:
26	output = open(sys.argv[5]+".new", "w")
27
28	# 1. first parse both PDB files into arrays (id, element, xyz) , therewhile scan BoundaryBoxes
29	max = [ 0., 0., 0. ]
30	min = [ 0., 0., 0. ]
31	x = [ 0., 0., 0. ]
32	nr=0
33	srcAtoms = []
34	#if "xyz" in suffix:
35	# print "Scanning source XYZ file "+sys.argv[1]+"."
36	# for line in inputsrc:
37	# if (len(entries)<4 or (entries[0]!="O" and entries[0]!="Si" and entries[0]!="Ca")):
38	# continue
39	# entries = line.split()
40	# for n in range(3):
41	# x[n] = float(entries[offsetXYZ+n])
42	# if x[n] > max[n]:
43	# max[n] = x[n]
44	# if x[n] < min[n]:
45	# min[n] = x[n]
46	# srcAtoms.append([int(entries[offsetID]), x[0], x[1], x[2]])
47	# nr+=1
48	#elif "pdb" in suffix:
49	print "Scanning source PDB file "+sys.argv[1]+"."
50	for line in inputsrc:
51	if "#" in line:
52	continue
53	if "END" in line:
54	break
55	if "ATOM" in line:
56	entries = line.split()
57	for n in range(3):
58	x[n] = float(entries[offsetXYZ+n])
59	if x[n] > max[n]:
60	max[n] = x[n]
61	if x[n] < min[n]:
62	min[n] = x[n]
63	srcAtoms.append([int(entries[offsetID]), x[0], x[1], x[2]])
64	nr+=1
65	#else:
66	# print "Cannot recognize input source file format: $suffix."
67	# sys.exit(1)
68
69	inputsrc.close()
70	print "Scanned "+str(nr)+" source atoms."
71
72	print "Scanning destination XYZ file "+sys.argv[2]+"."
73	destAtoms = []
74	nr = 0
75	for line in inputdestPDB:
76	entries = line.split()
77	if (len(entries)<4 or (entries[0]!="O" and entries[0]!="Si" and entries[0]!="Ca")):
78	continue
79	for n in range(3):
80	x[n] = float(entries[1+n])
81	if x[n] > max[n]:
82	x[n]-=max[n]
83	if x[n] < min[n]:
84	x[n]+=max[n]
85	destAtoms.append([nr, x[0], x[1], x[2]])
86	nr+=1
87	inputdestPDB.close()
88
89	#nr=0
90	#for line in inputdestPDB:
91	# if "#" in line:
92	# continue
93	# if "END" in line:
94	# break
95	# if "ATOM" in line:
96	# entries = line.split()
97	# for n in range(3):
98	# x[n] = float(entries[offsetXYZ+n])
99	# if x[n] > max[n]:
100	# max[n] = x[n]
101	# if x[n] < min[n]:
102	# min[n] = x[n]
103	# destAtoms.append([int(entries[offsetID]), x[0], x[1], x[2]])
104	# nr+=1
105	#inputdestPDB.close()
106	#
107	print "Scanned "+str(nr)+" destination atoms."
108
109	# 2. create Linked Cell with minimum distance box length
110	print "Found Box bounds [%f,%f]x[%f,%f]x[%f,%f]." % (min[0],max[0],min[1],max[1],min[2],max[2])
111	for i in range(3): # shift by minimum if below zero
112	if min[i] < 0:
113	max[i]-=min[i]
114	else:
115	min[i]=0
116	cells_x=int(math.ceil(float(max[0])/CUTOFF))+1
117	cells_y=int(math.ceil(float(max[1])/CUTOFF))+1
118	cells_z=int(math.ceil(float(max[2])/CUTOFF))+1
119	print "Number of cells in each axis direction (%f,%f,%f)." % (cells_x, cells_y, cells_z)
120
121	# 3. put each atom into its cell, lists may contain multiple atoms, mark src(0) or dest (1)
122	cell=[]
123	for i in range(cells_x):
124	cell.append([])
125	for j in range(cells_y):
126	cell[i].append([])
127	for k in range(cells_z):
128	cell[i][j].append([0])
129	for i in range(len(srcAtoms)):
130	atom = srcAtoms[i]
131	for n in range(3):
132	x[n] = int(math.floor(float(atom[1+n])/CUTOFF))
133	try:
134	cell[x[0]][x[1]][x[2]][0]+=1
135	cell[x[0]][x[1]][x[2]].append([0,i]) # 0 means src
136	except IndexError:
137	print "!IndexError with indices "+str(x[0])+" "+str(x[1])+" "+str(x[2])+" on source atom "+str(atom)+"."
138	#print "Source atom "+str(i)+" goes to cell "+str(x[0])+","+str(x[1])+","+str(x[2])+"."
139	for i in range(len(destAtoms)):
140	atom = destAtoms[i]
141	for n in range(3):
142	x[n] = int(math.floor(float(atom[1+n])/CUTOFF))
143	try:
144	cell[x[0]][x[1]][x[2]][0]+=1
145	cell[x[0]][x[1]][x[2]].append([1,i]) # 1 means dest
146	except IndexError:
147	print "!IndexError with indices "+str(x[0])+" "+str(x[1])+" "+str(x[2])+" on destination atom "+str(atom)+"."
148	#print "Destination atom "+str(i)+" goes to cell "+str(x[0])+","+str(x[1])+","+str(x[2])+"."
149
150	# 4. go through each cell and match (src, dest)-pairs by closest distance, warn if greater than EPSILON
151	srcMatches=0
152	destMatches=0
153	Map = {}
154	i=-1
155	j=-1
156	k=-1
157	l=-1
158	e=-1
159	r=-1
160	t=-1
161	m=-1
162	for i in range(cells_x):
163	for j in range(cells_y):
164	for k in range(cells_z):
165
166	#go through every atom in cell
167	try:
168	for l in range(1, cell[i][j][k][0]+1):
169	if cell[i][j][k][l][0] != 0: # skip if it's not a src atom
170	continue
171	atom1=cell[i][j][k][l][1]
172	#print "Current source atom is "+str(srcAtoms[atom1][0])+"."
173	currentPair=[atom1,-1]
174	oldDist=0.
175	# go through cell and all lower neighbours
176	for e in range(i-1,i+2):
177	#if on boarder continue periodic
178	#if e>cells_x-1:
179	# e=e-cells_x
180	if (e < 0) or (e >= cells_x):
181	continue
182	for r in range(j-1,j+2):
183	#if on boarder continue periodic
184	#if r>cells_y-1:
185	# r=r-cells_y
186	if (r < 0) or (r >= cells_y):
187	continue
188	for t in range(k-1,k+2):
189	#if on boarder continue periodic
190	#if t>cells_z-1:
191	# t=t-cells_z
192	if (t < 0) or (t >= cells_z):
193	continue
194	#go through all atoms in cell
195	for m in range(1, cell[e][r][t][0]+1):
196	if cell[e][r][t][m][0] != 1: # skip if it's not a dest atom
197	continue
198	atom2=cell[e][r][t][m][1]
199	#print "Current destination atom is "+str(destAtoms[atom2][0])+"."
200	dist=0
201	tmp=0
202	for n in range(3):
203	tmp = srcAtoms[atom1][1+n] - destAtoms[atom2][1+n]
204	dist += tmp*tmp
205	#print "Squared distance between the two is "+str(dist)+"."
206	if ((oldDist > dist) or ((currentPair[1] == -1) and (dist<EPSILON))):
207	currentPair[1] = atom2
208	oldDist = dist
209	if currentPair[1] == -1:
210	#wrerr("Could not find a suitable partner for srcAtom (%d,%d)!\n" % (srcAtoms[currentPair[0]][0],currentPair[1]))
211	Map[ srcAtoms[currentPair[0]][0] ] = currentPair[1]
212	else:
213	#print "Found a suitable partner for srcAtom "+str(srcAtoms[currentPair[0]][0])+","+str(destAtoms[currentPair[1]][0])+"."
214	srcMatches+=1
215	destMatches+=1
216	Map[ srcAtoms[currentPair[0]][0] ] = destAtoms[currentPair[1]][0]
217	except IndexError:
218	wrerr("Index Error: (%d,%d,%d)[%d] and (%d,%d,%d)[%d]\n" % (i,j,k,l,e,r,t,m))
219	break
220
221	# 5. print the listing
222	print "We have "+str(srcMatches)+" matching atoms."
223	print "Mapping is:"
224	for key in Map:
225	if Map[key] != -1:
226	print str(key)+" -> "+str(Map[key])
227	#print Map # work also
228
229	# 6. use the listing to rewrite the dbond file, store under given filename
230	line=inputsrcDBOND.readline()
231	output.write(line)
232	for line in inputsrcDBOND:
233	if "#" in line:
234	continue
235	entries=line.split()
236	flag=0
237	for n in range(len(entries)):
238	try:
239	if Map[ int(entries[n]) ] == -1:
240	flag=1
241	except KeyError:
242	print "entries["+str(n)+"] = "+str(int(entries[n]))+" does not exist in Map."
243	flag=1
244	if flag==1:
245	continue
246	for n in range(len(entries)):
247	output.write("%d\t" % (Map[ int(entries[n]) ]))
248	output.write("\n")
249	output.close()
250
251	# exit

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format