"""
.. codeauthor:: Niklaus Johner <niklaus.johner@a3.epfl.ch>
This module contains some helper functions for reading and writing files
"""
from ost import *
import math
__all__=('ReadMissingResiduesFromPDB','ReadSymmetryFromPDB','ParsePDBForCubicPhase',\
'ReadUnitCellFromPDB','WriteListOfListsInLines','WriteListOfListsInColumns',\
'WriteFloatList','ReadFloatListFile','ReadFile')
[docs]def ReadMissingResiduesFromPDB(filename):
file=open(filename,'r')
missing_res_list=[]
flag=False
for l in file:
if l.startswith('REMARK 465 M RES C SSSEQI'):
flag=True
continue
if not flag:continue
if not l.startswith('REMARK 465'):
flag=False
continue
s=l.split()
try:missing_res_list.append((s[3],int(s[4]),s[2]))
except:print 'could not add residue for line:',l
return missing_res_list
[docs]def ReadSymmetryFromPDB(file):
file.seek(0)
search_string='SMTRY1'
transformation_list=[]
for line in file:
sl=line.split()
if sl[0]=='ATOM': break
try:
if (sl[0]=='REMARK' and sl[1]=='290' and sl[2]==search_string):
i=int(search_string[-1])
if i==1:
a=[]
for j,el in enumerate(sl[4:8]):
a.append(float(el))
search_string=search_string[:-1]+str(i+1)
if i==3:
a.extend([0.0,0.0,0.0,1.0])
transformation_list.append(geom.Mat4(a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7],a[8],a[9],a[10],a[11],a[12],a[13],a[14],a[15]))
search_string=search_string[:-1]+str(1)
else: continue
except: continue
return transformation_list
def FindLines(file,search_list,break_list):
"""
returns a list of lines in the file which have as successive elements
the elements in search_list
"""
file.seek(0)
lines=[]
ns=len(search_list)
for line in file:
sl=line.split()
nl=len(sl)
if len(sl)==0:continue
if sl[0] in break_list:return lines
if nl<ns:continue
if all([sli==el for sli,el in zip(sl,search_list)]):lines.append(line)
return lines
def FindBioUnitTransformations(file,biounit_id=1):
lines_temp=FindLines(file,['REMARK','350'],['ATOM'])
lines=[]
for i,l in enumerate(lines_temp):
if not 'BIOMOLECULE:{0}'.format(biounit_id) in ''.join(l.split()):continue
for j,l2 in enumerate(lines_temp[i+1:]):
if 'BIOMOLECULE:{0}'.format(biounit_id+1) in ''.join(l2.split()):break
lines.append(l2)
for i,line in enumerate(lines):
if 'APPLY THE FOLLOWING TO CHAINS' in line:
sl=line.split(':')
cnames=[el.strip() for el in sl[1].split(',')]
if not 'cnames' in locals():
print 'Chains to apply transformations to are not defined in record'
cnames=[]
Tl=[]
for i,line in enumerate(lines):
sl=line.split()
if len(sl)<6:continue
if 'BIOMT1'==sl[2]:
il=[]
for j in range(3):
l=lines[i+j]
sl=l.split()
if not sl[2]=='BIOMT{0}'.format(j+1):
print 'problem in BIOMT record'
return Tl,cnames
for k in range(4):il.append(float(sl[4+k]))
il.extend([0.0,0.0,0.0,1.0])
M=geom.Mat4(*il)
T=geom.Transform()
T.SetMatrix(M)
Tl.append(T)
return Tl,cnames
[docs]def ParsePDBForCubicPhase(file):
file.seek(0)
search_lines=['REMARK','280']
search_strings=['LIPIDIC MESOPHASE','LIPIDIC CUBIC PHASE','LIPID CUBIC PHASE']
flag=0
file_string=''
for line in file:
sl=line.split()
if sl[0]=='ATOM': break
try:
flag2=0
for i,el in enumerate(search_lines):
if sl[i]!=el:flag2=1
if flag2: continue
if len(sl)==2:
flag+=1
continue
if flag==2:
for el in sl[2:]:
file_string+=' '+el
except:continue
#sl=file_string.split(', ')
for el in search_strings:
#if el in sl:
if el in file_string:
return True
return False
[docs]def ReadUnitCellFromPDB(file):
file.seek(0)
search_string='CRYST1'
flag=0
for line in file:
if not line[:6]==search_string:
continue
sl=line.split()
print 'Cryst1 found:',sl
uc=sl[1:7]
for i,el in enumerate(uc):
if i<3:
uc[i]=float(el)
else:
uc[i]=float(el)*math.pi/180
return uc
return 'na'
[docs]def WriteListOfListsInLines(column_titles,ll,filename,separator=','):
f=open(filename,'w')
f.write(separator.join(column_titles)+'\n')
for l in ll:
f.write(separator.join([str(el) for el in l])+'\n')
f.close()
return
[docs]def WriteListOfListsInColumns(column_titles,ll,filename,separator=','):
f=open(filename,'w')
f.write(separator.join(column_titles)+'\n')
for i in range(len(ll[0])):
l=[str(el[i]) for el in ll]
f.write(separator.join(l)+'\n')
f.close()
return
[docs]def WriteFloatList(fl,filename,separator=',',column_title=None):
f=open(filename,'w')
if column_title:f.write(column_title+'\n')
for el in fl:
f.write(str(el)+'\n')
f.close()
return
[docs]def ReadFloatListFile(filename,separator=',',column_titles=True,comment=None):
float_list_dict={}
f=open(filename,'r')
titles=f.next().rstrip()
titles=titles.split(separator)
if column_titles:
for el in titles:float_list_dict[el]=FloatList()
else:
for i in range(len(titles)):float_list_dict[i]=FloatList()
titles=range(len(titles))
f.seek(0)
for l in f:
if comment:
if l.startswith(comment):continue
sl=l.split(separator)
for el,title in zip(sl,titles):float_list_dict[title].append(float(el))
return float_list_dict
[docs]def ReadFile(filename,format,separator=',',column_titles=True):
float_list_dict={}
f=open(filename,'r')
if column_titles:
titles=f.next().rstrip()
titles=titles.split(separator)
else:titles=range(len(format))
for el in titles:float_list_dict[el]=[]
for l in f:
sl=l.split(separator)
for el,title in zip(sl,titles):float_list_dict[title].append(el)
for title,f in zip(titles,format):
if f in ['float','f']:float_list_dict[title]=FloatList([float(el) for el in float_list_dict[title]])
if f in ['int','i']:float_list_dict[title]=IntList([int(el) for el in float_list_dict[title]])
if f in ['bool','b']:float_list_dict[title]=[bool(el) for el in float_list_dict[title]]
return float_list_dict