#!/usr/bin/env python
# USAGE: python cgenff_charmm2gmx.py DRUG drug.mol2 drug.str charmm36.ff
# Tested with Python 2.7.3 and 2.7.12. Requires numpy and networkx
# The networkx version MUST be in the 1.x series. Tested version: 1.11
# Copyright (C) 2014 E. Prabhu Raman prabhu@outerbanks.umaryland.edu
#
# Modified 11/6/2018 by Justin Lemkul to add lone pair support
# needed for CGenFF >= 4.0 halogens
#
# For help/questions/bug reports, please contact Justin Lemkul jalemkul@vt.edu
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# EXAMPLE: You have a drug-like molecule in drug.mol2 file
# ParamChem returns a CHARMM stream file drug.str with topology and parameters
# INPUT
# The program needs four inputs:
# (1) The first argument (resname) is found in the RESI entry in the CHARMM stream file; for example
# RESI DRUG 0.000 ! here DRUG is the resname
# (2) drug.mol2 is the .mol2 which you supplied to ParamChem
# (3) drug.str contains the topology entry and CGenFF parameters that ParamChem generates for you
# (4) charmm36.ff should contain the CHARMM force-field in GROMACS format
# Download it from: http://mackerell.umaryland.edu/CHARMM_ff_params.html
# OUTPUT
# The program will generate 4 output files ("DRUG" is converted to lowercase and the files are named accordingly):
# (1) drug.itp - contains GROMACS itp
# (2) drug.prm - contains parameters obtained from drug.str which are converted to GROMACS format and units
# (3) drug.top - A Gromacs topology file which incorporates (1) and (2)
# (4) drug_ini.pdb - Coordinates of the molecule obtained from drug.mol2
# The program has been tested only on CHARMM stream files containing topology and parameters of a single molecule.
import string
import re
import sys
import os
import math
import numpy as np
import networkx as nx
#=================================================================================================================
def check_versions(str_filename,ffdoc_filename):
ffver = 0 # CGenFF version in force field directory
strver = 0 # CGenFF version in stream file
f = open(str_filename, 'r')
for line in f.readlines():
if line.startswith("* For use with CGenFF version"):
entry = re.split('\s+', string.lstrip(line))
strver = entry[6]
print "--Version of CGenFF detected in ",str_filename,":",strver
f.close()
f = open(ffdoc_filename, 'r')
for line in f.readlines():
if line.startswith("Parameters taken from CHARMM36 and CGenFF"):
entry = re.split('\s+', string.lstrip(line))
ffver = entry[6]
print "--Version of CGenFF detected in ",ffdoc_filename,":",ffver
f.close()
# warn the user about version mismatch
if strver != ffver:
print "\nWARNING: CGenFF versions are not equivalent!\n"
# in case something has gone horribly wrong
if (strver == 0) or (ffver == 0):
print "\nERROR: Could not detect CGenFF version. Exiting.\n"
exit()
#-----------------------------------------------------------------------
## jal
def is_lp(s):
if ((s[0]=='L') and (s[1]=='P')):
return True
return False
#-----------------------------------------------------------------------
## jal
def is_lp_host_atom(self,name):
for ai in range (0,self.nvsites):
if (name==self.G.node[ai]['at1']):
return True
return False
#-----------------------------------------------------------------------
## jal - only for COLINEAR lone pairs, since CGenFF only needs this now
def construct_lp(x1,y1,z1,x2,y2,z2,dist):
dx = x1-x2
dy = y1-y2
dz = z1-z2
dr = math.sqrt(dx*dx+dy*dy+dz*dz)
dr = dist/dr
# set LP coords
xlp = x1+dr*dx
ylp = y1+dr*dy
zlp = z1+dr*dz
return xlp,ylp,zlp
#-----------------------------------------------------------------------
## jal
def find_vsite(self, atnum):
for i in range (0, self.nvsites):
# if we find the LP host, find the LP atom index
if (self.G.node[i]['at1'] == self.G.node[atnum]['name']):
for j in range (0, self.natoms):
if (self.G.node[i]['vsite'] == self.G.node[j]['name']):
return j
#-----------------------------------------------------------------------
def read_gmx_atomtypes(filename):
atomtypes = []
f = open(filename, 'r')
for line in f.readlines():
if line.startswith(";"):
continue
if line == '\n':
continue
entry = re.split('\s+', string.lstrip(line))
var = [entry[0],entry[1]]
atomtypes.append(var)
f.close()
return atomtypes
#-----------------------------------------------------------------------
def get_filelist_from_gmx_forcefielditp(ffdir,ffparentfile):
filelist=[]
f = open(ffdir+"/"+ffparentfile, 'r')
for line in f.readlines():
if line.startswith("#include"):
entry = re.split('\s+', string.lstrip(line))
filename = ffdir + "/" + entry[1].replace("\"","")
filelist.append(filename)
return filelist
#-----------------------------------------------------------------------
def read_gmx_anglpars(filename):
angllines = []
f = open(filename, 'r')
section="NONE"
for line in f.readlines():
if line.startswith(";"):
continue
if line.startswith("\n"):
continue
if line.startswith("["):
section="NONE"
if(section=="ANGL"):
angllines.append(line)
if line.startswith("[ angletypes ]"):
section="ANGL"
anglpars = []
anglpar = {}
for line in angllines:
entry = re.split('\s+', string.lstrip(line))
ai, aj, ak, eq = entry[0],entry[1],entry[2],float(entry[4])
anglpars.append([ai,aj,ak,eq])
return anglpars
#-----------------------------------------------------------------------
def get_charmm_rtp_lines(filename,molname):
foundmol=0
store=0
rtplines=[]
f = open(filename, 'r')
section="NONE"
for line in f.readlines():
if(store==1) and line.startswith("RESI"):
store=0
if line.startswith("RESI"):
entry = re.split('\s+', string.lstrip(line))
rtfmolname=entry[1]
if(rtfmolname == molname):
store=1
if line.startswith("END"):
store=0
if(store==1):
rtplines.append(line)
return rtplines
#-----------------------------------------------------------------------
def get_charmm_prm_lines(filename):
foundmol=0
store=0
prmlines=[]
f = open(filename, 'r')
section="NONE"
for line in f.readlines():
if line.startswith("END"):
section="NONE"
store=0
if(store):
prmlines.append(line)
if line.startswith("read para"):
section="PRM"
store=1
return prmlines
#-----------------------------------------------------------------------
def parse_charmm_topology(rtplines):
topology = {}
noblanks = filter(lambda x: len(x.strip())>0, rtplines)
nocomments = filter(lambda x: x.strip()[0] not in ['*','!'], noblanks)
section = "BONDS" # default
state = "free"
for line in nocomments:
if state == "free":
if line.find("MASS") == 0:
if "ATOMS" not in topology.keys():
topology["ATOMS"] = {}
s = line.split()
idx,name,mass,type = int(s[1]),s[2],float(s[3]),s[4]
if line.find("!"):
comment = line[line.find("!")+1:].strip()
else:
comment = ""
topology["ATOMS"][name] = [idx,mass,type,comment]
elif line.find("DECL") == 0:
if "DECL" not in topology.keys():
topology["DECL"] = []
decl = line.split()[1]
topology["DECL"].append(decl)
elif line.find("DEFA") == 0:
topology["DEFA"] = line[4:]
elif line.find("AUTO") == 0:
topology["AUTO"] = line[4:]
elif line.find("RESI") == 0:
if "RESI" not in topology.keys():
topology["RESI"] = {}
state = "resi"
s = line.split()
resname, charge = s[1],float(s[2])
topology["RESI"][resname] = {}
topology["RESI"][resname]["charge"] = charge
topology["RESI"][resname]["cmaps"] = []
topology["RESI"][resname]["vsites"] = []
topology["RESI"][resname]["bonds"] = []
topology["RESI"][resname]["impropers"] = []
topology["RESI"][resname]["double_bonds"] = []
group = -1
elif line.find("PRES") == 0:
state = "pres"
s = line.split()
presname, charge = s[1],float(s[2])
elif line.find("END") == 0:
return topology
elif state == "resi":
if line.find("RESI")==0:
state = "resi"
s = line.split()
resname, charge = s[1],float(s[2])
topology["RESI"][resname] = {}
topology["RESI"][resname]["charge"] = charge
topology["RESI"][resname]["cmaps"] = []
topology["RESI"][resname]["vsites"] = []
topology["RESI"][resname]["bonds"] = []
topology["RESI"][resname]["impropers"] = []
topology["RESI"][resname]["double_bonds"] = []
#topology["RESI"][resname]["groups"] = []
group = -1
elif line.find("GROU")==0:
group += 1
topology["RESI"][resname][group] = []
elif line.find("ATOM")==0:
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
name,type,charge = s[1],s[2],float(s[3])
topology["RESI"][resname][group].append((name,type,charge))
## jal - adding lone pair support
elif line.find("LONE")==0:
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
name,at1,at2,dist = s[2],s[3],s[4],(float(s[6])*0.1)
topology["RESI"][resname]["vsites"].append((name,at1,at2,dist))
elif line.find("BOND")==0:
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
nbond = (len(s)-1)/2
for i in range(nbond):
p,q = s[1+2*i],s[2+2*i]
## jal - ignore "bonds" to lone pairs
if ((is_lp(p)==False) and (is_lp(q)==False)):
topology["RESI"][resname]["bonds"].append((p,q))
elif line.find("DOUB")==0:
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
ndouble = (len(s)-1)/2
for i in range(ndouble):
p,q = s[1+2*i],s[2+2*i]
topology["RESI"][resname]["double_bonds"].append((p,q))
elif line.find("IMPR")==0:
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
nimproper = (len(s)-1)/4
for i in range(nimproper):
impr = s[1+4*i],s[2+4*i],s[3+4*i],s[4+4*i]
topology["RESI"][resname]["impropers"].append(impr)
elif line.find("CMAP")==0:
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
#nimproper = (len(s)-1)/4
#for i in range(nimproper):
cmap = s[1:9]
topology["RESI"][resname]["cmaps"].append(cmap)
elif line.find("DONOR")==0:
continue # ignore for now
elif line.find("ACCEPTOR")==0:
continue
elif line.find("IC")==0:
continue
return topology
#-----------------------------------------------------------------------
def parse_charmm_parameters(prmlines):
parameters = {}
cmapkey = ()
noblanks = filter(lambda x: len(x.strip())>0, prmlines)
nocomments = filter(lambda x: x.strip()[0] not in ['*','!'], noblanks)
section = "ATOM" # default
for line in nocomments:
#print line
sectionkeys = [ "BOND", "ANGL", "DIHE", \
"IMPR", "CMAP", "NONB", "HBON", "NBFI" ]
key = line.split()[0]
#exit()
if key[0:4] in sectionkeys:
section = key[0:4]
continue
if section not in parameters.keys():
parameters[section] = []
#print line
if section == "BOND":
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
ai, aj, kij, rij = s[0],s[1],float(s[2]),float(s[3])
parameters["BOND"].append((ai,aj,kij,rij))
elif section == "ANGL":
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
ai, aj, ak = s[0],s[1],s[2]
other = map(float,s[3:])
parameters["ANGL"].append([ai,aj,ak]+other)
elif section == "DIHE":
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
ai, aj, ak, al, k, n, d = s[0],s[1],s[2],s[3],float(s[4]),int(s[5]),float(s[6])
parameters["DIHE"].append([ai,aj,ak,al,k,n,d])
elif section == "IMPR":
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
ai, aj, ak, al, k, d = s[0],s[1],s[2],s[3],float(s[4]),float(s[6])
parameters["IMPR"].append([ai,aj,ak,al,k,d])
elif section == "CMAP":
if line.find('!'):
line = line[:line.find('!')]
if cmapkey == ():
s = line.split()
a,b,c,d,e,f,g,h = s[0:8]
N = int(s[8])
cmapkey = (a,b,c,d,e,f,g,h,N)
cmaplist = []
else:
cmapdata = map(float,line.split())
cmaplist += cmapdata
if len(cmaplist) == N**2:
parameters["CMAP"].append([cmapkey,cmaplist])
cmapkey = ()
elif section == "NONB":
if line.find("cutnb")>=0 or line.find("wmin")>=0 or line.find("CUTNB")>=0 or line.find("WMIN")>=0 :
continue
bang = line.find('!')
if bang>0:
comment = line[bang+1:]
prm = line[:bang].split()
else:
comment = ""
prm = line.split()
atname = prm[0]
epsilon = -float(prm[2])
half_rmin = float(prm[3])
parameters["NONB"].append((atname,epsilon,half_rmin))
if len(prm)>4:
epsilon14 = -float(prm[5])
half_rmin14 = float(prm[6])
if "NONBONDED14" not in parameters.keys():
parameters["NONBONDED14"] = []
parameters["NONBONDED14"].append((atname,epsilon14,half_rmin14))
return parameters
#-----------------------------------------------------------------------
def write_gmx_bon(parameters,header_comments,filename):
kcal2kJ = 4.18400
outp = open(filename,"w")
outp.write("%s\n"%(header_comments))
outp.write("[ bondtypes ]\n")
kbond_conversion = 2.0*kcal2kJ/(0.1)**2 # [kcal/mol]/A**2 -> [kJ/mol]/nm**2
# factor of 0.5 because charmm bonds are Eb(r)=Kb*(r-r0)**2
rbond_conversion = .1 # A -> nm
outp.write(";%7s %8s %5s %12s %12s\n"%("i","j","func","b0","kb"))
if(parameters.has_key("BOND")):
for p in parameters["BOND"]:
ai,aj,kij,rij = p
rij *= rbond_conversion
kij *= kbond_conversion
outp.write("%8s %8s %5i %12.8f %12.2f\n"%(ai,aj,1,rij,kij))
kangle_conversion = 2.0*kcal2kJ # [kcal/mol]/rad**2 -> [kJ/mol]/rad**2
# factor of 0.5 because charmm angles are Ea(r)=Ka*(a-a0)**2
kub_conversion = 2.0*kcal2kJ/(0.1)**2 # [kcal/mol]/A**2 -> [kJ/mol]/nm**2prm
ub0_conversion = 0.1 # A -> nm
outp.write("\n\n[ angletypes ]\n")
outp.write(";%7s %8s %8s %5s %12s %12s %12s %12s\n"\
%("i","j","k","func","theta0","ktheta","ub0","kub"))
if(parameters.has_key("ANGL")):
for p in parameters["ANGL"]:
if len(p) == 5:
ai,aj,ak,kijk,theta = p
kub = 0.0
ub0 = 0.0
else:
ai,aj,ak,kijk,theta,kub,ub0 = p
kijk *= kangle_conversion
kub *= kub_conversion
ub0 *= ub0_conversion
outp.write("%8s %8s %8s %5i %12.6f %12.6f %12.8f %12.2f\n"\
%(ai,aj,ak,5,theta,kijk,ub0,kub))
kdihe_conversion = kcal2kJ
outp.write("\n\n[ dihedraltypes ]\n")
outp.write(";%7s %8s %8s %8s %5s %12s %12s %5s\n"\
%("i","j","k","l","func","phi0","kphi","mult"))
#parameters["DIHEDRALS"].sort(demote_wildcards)
if(parameters.has_key("DIHE")):
for p in parameters["DIHE"]:
ai,aj,ak,al,k,n,d = p
k *= kdihe_conversion
outp.write("%8s %8s %8s %8s %5i %12.6f %12.6f %5i\n"\
%(ai,aj,ak,al,9,d,k,n))
kimpr_conversion = kcal2kJ*2 # see above
outp.write("\n\n[ dihedraltypes ]\n")
outp.write("; 'improper' dihedrals \n")
outp.write(";%7s %8s %8s %8s %5s %12s %12s\n"\
%("i","j","k","l","func","phi0","kphi"))
if(parameters.has_key("IMPR")):
#parameters["IMPROPERS"].sort(demote_wildcards)
for p in parameters["IMPR"]:
ai,aj,ak,al,k,d = p
k *= kimpr_conversion
outp.write("%8s %8s %8s %8s %5i %12.6f %12.6f\n"\
%(ai,aj,ak,al,2,d,k))
outp.close()
return
#-----------------------------------------------------------------------
def write_gmx_mol_top(filename,ffdir,prmfile,itpfile,molname):
outp = open(filename,"w")
outp.write("#include \"%s/forcefield.itp\"\n" % (ffdir))
outp.write("\n")
outp.write("; additional params for the molecule\n")
outp.write("#include \"%s\"\n" % (prmfile))
outp.write("\n")
outp.write("#include \"%s\"\n" % (itpfile))
outp.write("\n")
outp.write("#include \"%s/tip3p.itp\"\n" % (ffdir))
outp.write("#ifdef POSRES_WATER\n")
outp.write("; Position restraint for each water oxygen\n")
outp.write("[ position_restraints ]\n")
outp.write("; i funct fcx fcy fcz\n")
outp.write(" 1 1 1000 1000 1000\n")
outp.write("#endif\n")
outp.write("\n")
outp.write("; Include topology for ions\n")
outp.write("#include \"%s/ions.itp\"\n" % (ffdir))
outp.write("\n")
outp.write("[ system ]\n")
outp.write("; Name\n")
outp.write("mol\n")
outp.write("\n")
outp.write("[ molecules ]\n")
outp.write("; Compound #mols\n")
outp.write("%s 1\n" % (molname))
outp.write("\n")
outp.close()
#=================================================================================================================
class atomgroup:
"""
A class that contains the data structures and functions to store and process
data related to groups of atoms (read molecules)
USAGE: m = atomgroup()
"""
def __init__(self):
self.G = nx.Graph()
self.name = ""
self.natoms = 0
self.nvsites = 0
self.nbonds = 0
self.angles = []
self.nangles = 0
self.dihedrals = []
self.ndihedrals = 0
self.impropers = []
self.nimpropers = 0
#self.coord=np.zeros((self.natoms,3),dtype=float)
#-----------------------------------------------------------------------
def read_charmm_rtp(self,rtplines,atomtypes):
"""
Reads CHARMM rtp
Reads atoms, bonds, impropers
Stores connectivity as a graph
Autogenerates angles and dihedrals
USAGE: m = atomgroup() ; m.read_charmm_rtp(rtplines,atomtypes)
"""
#initialize everything
self.G = nx.Graph()
self.name = ""
self.natoms = 0
self.nvsites = 0
self.nbonds = 0
self.angles = []
self.nangles = 0
self.dihedrals = []
self.ndihedrals = 0
self.impropers = []
self.nimpropers = 0
atm = {}
for line in rtplines:
if line.find('!'):
line = line[:line.find('!')]
if line.startswith("RESI"):
entry = re.split('\s+', string.lstrip(line))
self.name=entry[1]
if line.startswith("ATOM"):
entry = re.split('\s+', string.lstrip(line))
atm[self.natoms] = {'type':entry[2], 'resname':self.name, 'name':entry[1],
'charge':float(entry[3]),'mass':float(0.00), 'beta':float(0.0),
'x':float(9999.9999),'y':float(9999.9999),'z':float(9999.9999),'segid':self.name, 'resid':'1' }
for typei in atomtypes:
if(typei[0] == atm[self.natoms]['type']):
atm[self.natoms]['mass'] = float(typei[1])
break
self.G.add_node(self.natoms, atm[self.natoms])
self.natoms=self.natoms+1
## jal - adding lone pair support
if line.startswith("LONE"):
entry = re.split('\s+', string.rstrip(string.lstrip(line)))
atm[self.nvsites] = {'vsite':entry[2], 'at1':entry[3], 'at2':entry[4],
'dist':(float(entry[6])*0.1) }
# DEBUG
# print "Found lone pair in RTF: %s %s %s %.3f\n" % (atm[self.nvsites]['vsite'], atm[self.nvsites]['at1'], atm[self.nvsites]['at2'], atm[self.nvsites]['dist'])
self.G.add_node(self.nvsites, atm[self.nvsites])
self.nvsites=self.nvsites+1
if line.startswith("BOND") or line.startswith("DOUB"):
entry = re.split('\s+', string.rstrip(string.lstrip(line)))
numbonds = int((len(entry)-1)/2)
for bondi in range(0,numbonds):
found1 = False
found2 = False
for i in range(0,self.natoms):
if(atm[i]['name'] == entry[(bondi*2)+1]):
found1 = True
break
for j in range(0,self.natoms):
if(atm[j]['name'] == entry[(bondi*2)+2]):
found2 = True
break
if(not found1):
print "Error:atomgroup:read_charmm_rtp> Atomname not found in top",entry[(bondi*2)+1]
if(not found2):
print "Error:atomgroup:read_charmm_rtp> Atomname not found in top",entry[(bondi*2)+2]
## jal - ignore "bonds" to lone pairs
if ((is_lp(atm[i]['name'])==False) and (is_lp(atm[j]['name'])==False)):
self.G.add_edge(i,j)
self.G[i][j]['order']='1' # treat all bonds as single for now
self.nbonds=self.nbonds+1
if line.startswith("IMP"):
entry = re.split('\s+', string.lstrip(line))
numimpr = int((len(entry)-2)/4)
for impi in range(0,numimpr):
for i in range(0,self.natoms):
if(atm[i]['name'] == entry[(impi*4)+1]):
break
for j in range(0,self.natoms):
if(atm[j]['name'] == entry[(impi*4)+2]):
break
for k in range(0,self.natoms):
if(atm[k]['name'] == entry[(impi*4)+3]):
break
for l in range(0,self.natoms):
if(atm[l]['name'] == entry[(impi*4)+4]):
break
var = [i,j,k,l]
self.impropers.append(var)
self.nimpropers = len(self.impropers)
if(self.ndihedrals > 0 or self.nangles > 0):
print "WARNING:atomgroup:read_charmm_rtp> Autogenerating angl-dihe even though they are preexisting",self.nangles,self.ndihedrals
self.autogen_angl_dihe()
self.coord = np.zeros((self.natoms,3),dtype=float)
#-----------------------------------------------------------------------
def autogen_angl_dihe(self):
self.angles = []
for atomi in range(0,self.natoms):
nblist = []
for nb in self.G.neighbors(atomi):
nblist.append(nb)
for i in range(0,len(nblist)-1):
for j in range(i+1,len(nblist)):
var = [nblist[i],atomi,nblist[j]]
self.angles.append(var)
self.nangles = len(self.angles)
self.dihedrals = []
for i,j in self.G.edges_iter():
nblist1 = []
for nb in self.G.neighbors(i):
if(nb != j):
nblist1.append(nb)
nblist2 = []
for nb in self.G.neighbors(j):
if(nb != i):
nblist2.append(nb)
if(len(nblist1) > 0 and len(nblist2) > 0 ):
for ii in range(0,len(nblist1)):
for jj in range(0,len(nblist2)):
var = [nblist1[ii],i,j,nblist2[jj]]
if(var[0] != var[3]):
self.dihedrals.append(var)
self.ndihedrals = len(self.dihedrals)
#-----------------------------------------------------------------------
def get_nonplanar_dihedrals(self,angl_params):
nonplanar_dihedrals=[]
cutoff=179.9
for var in self.dihedrals:
d1=self.G.node[var[0]]['type']
d2=self.G.node[var[1]]['type']
d3=self.G.node[var[2]]['type']
d4=self.G.node[var[3]]['type']
keep=1
for angl_param in angl_params:
p1=angl_param[0]
p2=angl_param[1]
p3=angl_param[2]
eq=angl_param[3]
if( d2==p2 and ( ( d1==p1 and d3==p3) or (d1==p3 and d3==p1))):
if(eq > cutoff):
keep=-1
break
if( d3==p2 and ( (d2==p1 and d4==p3) or (d2==p3 and d4==p1))):
if(eq > cutoff):
keep=-1
break
if(keep==1):
nonplanar_dihedrals.append(var)
return nonplanar_dihedrals
#-----------------------------------------------------------------------
def write_gmx_itp(self,filename,angl_params):
f = open(filename, 'w')
f.write("; Created by cgenff_charmm2gmx.py\n")
f.write("\n")
f.write("[ moleculetype ]\n")
f.write("; Name nrexcl\n")
f.write("%s 3\n" % self.name)
f.write("\n")
f.write("[ atoms ]\n")
f.write("; nr type resnr residue atom cgnr charge mass typeB chargeB massB\n")
f.write("; residue 1 %s rtp %s q qsum\n" % (self.name,self.name))
pairs14 = nx.Graph()
for atomi in range(0,self.natoms):
pairs14.add_node(atomi)
f.write("%6d %10s %6s %6s %6s %6d %10.3f %10.3f ;\n" %
( atomi+1,self.G.node[atomi]['type'],
self.G.node[atomi]['resid'],self.name,self.G.node[atomi]['name'],atomi+1,
self.G.node[atomi]['charge'],self.G.node[atomi]['mass'] ) )
f.write("\n")
f.write("[ bonds ]\n")
f.write("; ai aj funct c0 c1 c2 c3\n")
for i,j in self.G.edges_iter():
f.write("%5d %5d 1\n" % (i+1,j+1) )
f.write("\n")
f.write("[ pairs ]\n")
f.write("; ai aj funct c0 c1 c2 c3\n")
for var in self.dihedrals:
if (len(nx.dijkstra_path(self.G,var[0],var[3])) == 4): #this is to remove 1-2 and 1-3 included in dihedrals of rings
pairs14.add_edge(var[0],var[3])
for i,j in pairs14.edges_iter():
f.write("%5d %5d 1\n" % (i+1,j+1) )
## jal - add LP pairs, same as parent atom
## Use is_lp_host_atom() to test each index, then find associated vsite
if ((is_lp_host_atom(self,self.G.node[i]['name'])==True)):
k = find_vsite(self, i)
f.write("%5d %5d 1\n" % (k+1,j+1) )
if ((is_lp_host_atom(self,self.G.node[j]['name'])==True)):
k = find_vsite(self, j)
f.write("%5d %5d 1\n" % (k+1,i+1) )
f.write("\n")
f.write("[ angles ]\n")
f.write("; ai aj ak funct c0 c1 c2 c3\n")
for var in self.angles:
f.write("%5d %5d %5d 5\n" % (var[0]+1,var[1]+1,var[2]+1) )
f.write("\n")
f.write("[ dihedrals ]\n")
f.write("; ai aj ak al funct c0 c1 c2 c3 c4 c5\n")
nonplanar_dihedrals=self.get_nonplanar_dihedrals(angl_params)
for var in nonplanar_dihedrals:
f.write("%5d %5d %5d %5d 9\n" % (var[0]+1,var[1]+1,var[2]+1,var[3]+1) )
f.write("\n")
if(self.nimpropers > 0):
f.write("[ dihedrals ]\n")
f.write("; ai aj ak al funct c0 c1 c2 c3\n")
for var in self.impropers:
f.write("%5d %5d %5d %5d 2\n" % (var[0]+1,var[1]+1,var[2]+1,var[3]+1) )
f.write("\n")
## jal - add vsite directive
## we use 3fd construction with a bit of a hack
## 1. we manually set the value of a = 0
## 2. constructing atoms j and k are intentionally the same
## 3. and the distance becomes negative to mean "outside the bond"
if (self.nvsites > 0):
func=2
a=0
f.write("[ virtual_sites3 ]\n")
f.write("; Site from funct a d\n")
for atomi in range (0,self.nvsites):
vsite = 0
at1 = 0
at2 = 0
# find atom name matches
for ai in range (0, self.natoms):
if (self.G.node[ai]['name'] == self.G.node[atomi]['vsite']):
vsite = ai
if (self.G.node[ai]['name'] == self.G.node[atomi]['at1']):
at1 = ai
if (self.G.node[ai]['name'] == self.G.node[atomi]['at2']):
at2 = ai
dist=self.G.node[atomi]['dist']*-1
f.write("%5d %5d %5d %5d %5d %5d %8.3f\n" % (vsite+1, at1+1, at2+1, at2+1, func, a, dist))
f.write("\n")
## jal - add exclusions for vsite
if (self.nvsites > 0):
f.write("[ exclusions ]\n")
f.write("; ai aj\n")
## jal - explicitly add all 1-2, 1-3, and 1-4 exclusions
## for the lone pair, which are the same as 1-2, 1-3, 1-4
## exclusions for the host (bonds, angles, pairs)
# first, exclude any LP from its host
for i in range (0, self.natoms):
if ((is_lp_host_atom(self,self.G.node[i]['name'])==True)):
# find the LP attached to this host, not necessarily consecutive
# in the topology
j = find_vsite(self, i)
f.write("%5d %5d 1\n" % (i+1,j+1) )
# first neighbors: 1-2
for i,j in self.G.edges_iter():
if ((is_lp_host_atom(self,self.G.node[i]['name'])==True)):
k = find_vsite(self, i)
f.write("%5d %5d 1\n" % (k+1,j+1) )
if ((is_lp_host_atom(self,self.G.node[j]['name'])==True)):
k = find_vsite(self, j)
f.write("%5d %5d 1\n" % (k+1,i+1) )
# second neighbors: 1-3
for var in self.angles:
# only need to consider ends of the angle, not middle atom
ai = var[0]
ak = var[2]
if ((is_lp_host_atom(self,self.G.node[ai]['name'])==True)):
l = find_vsite(self, ai)
f.write("%5d %5d 1\n" % (l+1,ak+1) )
if ((is_lp_host_atom(self,self.G.node[ak]['name'])==True)):
l = find_vsite(self, ak)
f.write("%5d %5d 1\n" % (l+1,ai+1) )
# third neighbors: 1-4
for i,j in pairs14.edges_iter():
if ((is_lp_host_atom(self,self.G.node[i]['name'])==True)):
k = find_vsite(self, i)
f.write("%5d %5d 1\n" % (k+1,j+1) )
if ((is_lp_host_atom(self,self.G.node[j]['name'])==True)):
k = find_vsite(self, j)
f.write("%5d %5d 1\n" % (k+1,i+1) )
f.write("\n")
f.close()
#-----------------------------------------------------------------------
def read_mol2_coor_only(self,filename):
check_natoms = 0
check_nbonds = 0
f = open(filename, 'r')
atm = {}
section="NONE"
for line in f.readlines():
secflag=False
if line.startswith("@"):
secflag=True
section="NONE"
if((section=="NATO") and (not secflag)):
entry = re.split('\s+', string.lstrip(line))
check_natoms=int(entry[0])
check_nbonds=int(entry[1])
if(check_natoms != self.natoms):
# jal - if there are lone pairs, these will not be in the mol2 file
if (self.nvsites == 0):
print "Error in atomgroup.py: read_mol2_coor_only: no. of atoms in mol2 (%d) and top (%d) are unequal" % (check_natoms, self.natoms)
print "Usually this means the specified residue name does not match between str and mol2 files"
#print check_natoms,self.natoms
exit()
else:
print ""
print "NOTE 4: %d lone pairs found in topology that are not in the mol2 file. This is not a problem, just FYI!\n" % (self.nvsites)
# jal - if we have correctly ignored bonds to LP then there is no need
# for any check here
if(check_nbonds != self.nbonds):
print "Error in atomgroup.py: read_mol2_coor_only: no. of bonds in mol2 (%d) and top (%d) are unequal" % (check_nbonds, self.nbonds)
#print check_nbonds,self.nbonds
exit()
section="NONE"
if((section=="MOLE") and (not secflag)):
self.name=line.strip()
section="NATO" #next line after @MOLECULE contains atom, bond numbers
if((section=="ATOM") and (not secflag)):
entry = re.split('\s+', string.lstrip(line))
## guard against blank lines
if (len(entry) > 1):
## jal - if there are lone pairs, these are not in mol2
## and are not necessarily something we can just tack on at the
## end of the coordinate section. Here, check the atom to see if it is
## the first constructing atom, and if so, we put in a dummy LP entry.
atomi = int(entry[0])-1
self.G.node[atomi]['x'] = float(entry[2])
self.G.node[atomi]['y'] = float(entry[3])
self.G.node[atomi]['z'] = float(entry[4])
self.coord[atomi][0] = float(entry[2])
self.coord[atomi][1] = float(entry[3])
self.coord[atomi][2] = float(entry[4])
## jal - if we have an atom that is the host for a LP, insert
## the LP into the list
if (is_lp_host_atom(self,self.G.node[atomi]['name'])):
atomj = find_vsite(self, atomi)
# insert dummy entry for LP
self.G.node[atomj]['x'] = float(9999.99)
self.G.node[atomj]['y'] = float(9999.99)
self.G.node[atomj]['z'] = float(9999.99)
self.coord[atomj][0] = float(9999.99)
self.coord[atomj][1] = float(9999.99)
self.coord[atomj][2] = float(9999.99)
if line.startswith("@MOLECULE"):
section="MOLE"
if line.startswith("@ATOM"):
section="ATOM"
if line.startswith("@BOND"):
section="BOND"
#-----------------------------------------------------------------------
def write_pdb(self,f):
for atomi in range(0,self.natoms):
if(len(self.G.node[atomi]['name']) > 4):
print "error in atomgroup.write_pdb(): atom name > 4 characters"
exit()
## jal - construct LP sites
if (is_lp(self.G.node[atomi]['name'])):
# DEBUG
# print "Found LP in write_pdb: %s\n" % self.G.node[atomi]['name']
# find constructing atoms, get their coordinates and construction distance*10
atn1 = "dum"
atn2 = "dum"
dist = 0
# loop over vsites
for ai in range (0,self.nvsites):
if (self.G.node[ai]['vsite'] == self.G.node[atomi]['name']):
atn1 = self.G.node[ai]['at1'] # atom name
atn2 = self.G.node[ai]['at2'] # atom name
dist = self.G.node[ai]['dist']*10 # Angstrom for PDB, was saved as *0.1 for GMX
# get atom indices
at1 = 0
at2 = 0
for ai in range (0, self.natoms):
if (self.G.node[ai]['name'] == atn1):
at1 = ai
if (self.G.node[ai]['name'] == atn2):
at2 = ai
# in case of failure
if ((at1==0) and (at2==0)):
print "Failed to match LP-constructing atoms in write_pdb!\n"
exit()
# DEBUG
# print "Found LP in write_pdb: %d %s %s with dist: %.3f\n" % ((atomi+1,at1+1,at2+1,dist))
# at1, at2, and dist only exist in vsite structure!
x1=self.coord[at1][0]
y1=self.coord[at1][1]
z1=self.coord[at1][2]
x2=self.coord[at2][0]
y2=self.coord[at2][1]
z2=self.coord[at2][2]
xlp,ylp,zlp = construct_lp(x1,y1,z1,x2,y2,z2,dist)
self.coord[atomi][0] = xlp
self.coord[atomi][1] = ylp
self.coord[atomi][2] = zlp
f.write("%-6s%5d %-4s %-4s%5s%12.3f%8.3f%8.3f%6.2f%6.2f\n" %
("ATOM",atomi+1,self.G.node[atomi]['name'],self.name,self.G.node[atomi]['resid'],self.coord[atomi][0],
self.coord[atomi][1],self.coord[atomi][2],1.0,self.G.node[atomi]['beta']))
f.write("END\n")
#=================================================================================================================
if(len(sys.argv) != 5):
print "Usage: RESNAME drug.mol2 drug.str charmm36.ff"
exit()
# check for compatible NetworkX version
if(float(nx.__version__) > 1.11):
print "Your NetworkX version is: ",nx.__version__
print "This script requires a version no higher than 1.11."
print "Your NetworkX package is incompatible with this conversion script and cannot be used."
exit()
if(sys.version_info > (3,0)):
print("You are using a Python version in the 3.x series. This script requires Python 2.x.")
print("Please visit http://mackerell.umaryland.edu/charmm_ff.shtml#gromacs to get a script for Python 3.x")
exit()
mol_name = sys.argv[1]
mol2_name = sys.argv[2]
rtp_name = sys.argv[3]
ffdir = sys.argv[4]
atomtypes_filename = ffdir + "/atomtypes.atp"
print "NOTE 1: Code tested with python 2.7.12. Your version:",sys.version
print ""
print "NOTE 2: Please be sure to use the same version of CGenFF in your simulations that was used during parameter generation:"
check_versions(rtp_name,ffdir + "/forcefield.doc")
print ""
print "NOTE 3: To avoid duplicated parameters, do NOT select the 'Include parameters that are already in CGenFF' option when uploading a molecule into CGenFF."
#for output
itpfile = mol_name.lower() + ".itp"
prmfile = mol_name.lower() + ".prm"
initpdbfile = mol_name.lower() + "_ini.pdb"
topfile = mol_name.lower() +".top"
atomtypes = read_gmx_atomtypes(atomtypes_filename)
angl_params = [] #needed for detecting triple bonds
filelist = get_filelist_from_gmx_forcefielditp(ffdir,"forcefield.itp")
for filename in filelist:
anglpars = read_gmx_anglpars(filename)
angl_params = angl_params + anglpars
m = atomgroup()
rtplines=get_charmm_rtp_lines(rtp_name,mol_name)
m.read_charmm_rtp(rtplines,atomtypes)
m.read_mol2_coor_only(mol2_name)
f = open(initpdbfile, 'w')
m.write_pdb(f)
f.close()
prmlines=get_charmm_prm_lines(rtp_name)
params = parse_charmm_parameters(prmlines)
write_gmx_bon(params,"",prmfile)
anglpars = read_gmx_anglpars(prmfile)
angl_params = angl_params + anglpars # append the new angl params
m.write_gmx_itp(itpfile,angl_params)
write_gmx_mol_top(topfile,ffdir,prmfile,itpfile,mol_name)
print "============ DONE ============"
print "Conversion complete."
print "The molecule topology has been written to %s" % (itpfile)
print "Additional parameters needed by the molecule are written to %s, which needs to be included in the system .top" % (prmfile)
print "\nPLEASE NOTE: lone pair construction requires duplicate host atom numbers, which will make grompp complain"
print "To produce .tpr files, the user MUST use -maxwarn 1 to circumvent this check"
print "============ DONE ============"
exit()