#!/usr/bin/python # # Takes two pdb file and a dbond file, matches the coordinates and thus creates a mapping from old ids to new ids. import sys, random, math, re wrerr=sys.stderr.write wrout=sys.stdout.write # check arguments if len(sys.argv) < 5: print "Usage: "+sys.argv[0]+" [destDBONDfile]" sys.exit(1) EPSILON=1e-3 CUTOFF=2. inputsrcPDB = open(sys.argv[1], "r") inputdestPDB = open(sys.argv[2], "r") inputsrcDBOND = open(sys.argv[5], "r") offsetID=int(sys.argv[3]) offsetXYZ=int(sys.argv[4]) if len(sys.argv) > 6: output = open(sys.argv[6],"w") else: output = open(sys.argv[5]+".new", "w") # 1. first parse both PDB files into arrays (id, element, xyz) , therewhile scan BoundaryBoxes max = [ 0., 0., 0. ] min = [ 0., 0., 0. ] x = [ 0., 0., 0. ] print "Scanning source PDB file"+sys.argv[1]+"." srcAtoms = [] for line in inputsrcPDB: if "#" in line: continue if "END" in line: break if "ATOM" in line: entries = line.split() for n in range(3): x[n] = float(entries[offsetXYZ+n]) if x[n] > max[n]: max[n] = x[n] if x[n] < min[n]: min[n] = x[n] srcAtoms.append([int(entries[offsetID]), x[0], x[1], x[2]]) inputsrcPDB.close() print "Scanning destination XYZ file"+sys.argv[2]+"." destAtoms = [] index = 0 for line in inputdestPDB: entries = line.split() if (len(entries)<1 or (entries[0]!="O" and entries[0]!="Si" and entries[0]!="Ca")): continue for n in range(3): x[n] = float(entries[1+n]) if x[n] > max[n]: x[n]-=max[n] if x[n] < min[n]: x[n]+=max[n] destAtoms.append([index, x[0], x[1], x[2]]) index+=1 inputdestPDB.close() # 2. create Linked Cell with minimum distance box length print "Found Box bounds [%f,%f]x[%f,%f]x[%f,%f]." % (min[0],max[0],min[1],max[1],min[2],max[2]) for i in range(3): # shift by minimum if below zero if min[i] < 0: max[i]-=min[i] else: min[i]=0 cells_x=int(math.ceil(float(max[0])/CUTOFF)) cells_y=int(math.ceil(float(max[1])/CUTOFF)) cells_z=int(math.ceil(float(max[2])/CUTOFF)) print "Number of cells in each axis direction (%f,%f,%f)." % (cells_x, cells_y, cells_z) # 3. put each atom into its cell, lists may contain multiple atoms, mark src(0) or dest (1) cell=[] for i in range(cells_x): cell.append([]) for j in range(cells_y): cell[i].append([]) for k in range(cells_z): cell[i][j].append([0]) for i in range(len(srcAtoms)): atom = srcAtoms[i] print atom for n in range(3): x[n] = int(math.floor(float(atom[1+n])/CUTOFF)) if cells_x ==x[0]: x[0]-=1 if cells_y ==x[1]: x[1]-=1 if cells_z ==x[2]: x[2]-=1 print x cell[x[0]][x[1]][x[2]][0]+=1 cell[x[0]][x[1]][x[2]].append([0,i]) # 0 means src print "Source atom "+str(i)+" goes to cell "+str(x[0])+","+str(x[1])+","+str(x[2])+"." for i in range(len(destAtoms)): atom = destAtoms[i] for n in range(3): x[n] = int(math.floor(float(atom[1+n])/CUTOFF)) cell[x[0]][x[1]][x[2]][0]+=1 cell[x[0]][x[1]][x[2]].append([1,i]) # 1 means dest print "Destination atom "+str(i)+" goes to cell "+str(x[0])+","+str(x[1])+","+str(x[2])+"." # 4. go through each cell and match (src, dest)-pairs by closest distance, warn if greater than EPSILON srcMatches=0 destMatches=0 Map = {} i=-1 j=-1 k=-1 l=-1 e=-1 r=-1 t=-1 m=-1 for i in range(cells_x): for j in range(cells_y): for k in range(cells_z): #go through every atom in cell try: for l in range(1, cell[i][j][k][0]+1): if cell[i][j][k][l][0] != 0: # skip if it's not a src atom continue atom1=cell[i][j][k][l][1] print "Current source atom is "+str(srcAtoms[atom1][0])+"." currentPair=[atom1,-1] oldDist=0. # go through cell and all lower neighbours for e in range(i-1,i+2): #if on boarder continue periodic #if e>cells_x-1: # e=e-cells_x if (e < 0) or (e >= cells_x): continue for r in range(j-1,j+2): #if on boarder continue periodic #if r>cells_y-1: # r=r-cells_y if (r < 0) or (r >= cells_y): continue for t in range(k-1,k+2): #if on boarder continue periodic #if t>cells_z-1: # t=t-cells_z if (t < 0) or (t >= cells_z): continue #go through all atoms in cell for m in range(1, cell[e][r][t][0]+1): if cell[e][r][t][m][0] != 1: # skip if it's not a dest atom continue atom2=cell[e][r][t][m][1] print "Current destination atom is "+str(destAtoms[atom2][0])+"." dist=0 tmp=0 for n in range(3): tmp = srcAtoms[atom1][1+n] - destAtoms[atom2][1+n] dist += tmp*tmp print "Squared distance between the two is "+str(dist)+"." if ((oldDist > dist) or ((currentPair[1] == -1) and (dist "+str(Map[key]) #print Map # work also # 6. use the listing to rewrite the dbond file, store under given filename for line in inputsrcDBOND: if "#" in line: continue entries=line.split() flag=0 for n in range(len(entries)): if int(entries[n]) == 0 or Map[ int(entries[n]) ] == -1: flag=1 if flag==1: continue for n in range(len(entries)): output.write("%d\t" % (Map[ int(entries[n]) ])) output.write("\n") output.close() print "We have "+str(srcMatches)+" matching atoms." # exit