#!/usr/bin/env python3
# USAGE: python cgenff_charmm2gmx.py DRUG drug.mol2 drug.str charmm36.ff
# Tested with Python 3.5.2. Requires numpy and networkx
# The networkx version MUST be in the 1.x series. Tested version: 1.11
# Copyright (C) 2014 E. Prabhu Raman prabhu@outerbanks.umaryland.edu
#
# Modified 11/6/2018 by Justin Lemkul to add lone pair support
# needed for CGenFF >= 4.0 halogens
#
# Modified 01/10/2019 by Conrard Tetsassi to work with Networkx 2.3
# Included notes on bonds, angles, and dihedrals
#
# For help/questions/bug reports, please contact Justin Lemkul jalemkul@vt.edu
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# EXAMPLE: You have a drug-like molecule in drug.mol2 file
# ParamChem returns a CHARMM stream file drug.str with topology and parameters
# INPUT
# The program needs four inputs:
# (1) The first argument (resname) is found in the RESI entry in the CHARMM stream file; for example
# RESI DRUG 0.000 ! here DRUG is the resname
# (2) drug.mol2 is the .mol2 which you supplied to ParamChem
# (3) drug.str contains the topology entry and CGenFF parameters that ParamChem generates for you
# (4) charmm36.ff should contain the CHARMM force-field in GROMACS format
# Download it from: http://mackerell.umaryland.edu/CHARMM_ff_params.html
# OUTPUT
# The program will generate 4 output files ("DRUG" is converted to lowercase and the files are named accordingly):
# (1) drug.itp - contains GROMACS itp
# (2) drug.prm - contains parameters obtained from drug.str which are converted to GROMACS format and units
# (3) drug.top - A Gromacs topology file which incorporates (1) and (2)
# (4) drug_ini.pdb - Coordinates of the molecule obtained from drug.mol2
# The program has been tested only on CHARMM stream files containing topology and parameters of a single molecule.
import string
import re
import sys
import os
import math
import numpy as np
import networkx as nx
#=================================================================================================================
def check_versions(str_filename,ffdoc_filename):
ffver = 0 # CGenFF version in force field directory
strver = 0 # CGenFF version in stream file
f = open(str_filename, 'r')
for line in f.readlines():
if line.startswith("* For use with CGenFF version"):
entry = re.split('\s+', line.lstrip())
strver = entry[6]
print("--Version of CGenFF detected in ",str_filename,":",strver)
f.close()
f = open(ffdoc_filename, 'r')
for line in f.readlines():
if line.startswith("Parameters taken from CHARMM36 and CGenFF"):
entry = re.split('\s+', line.lstrip())
ffver = entry[6]
print("--Version of CGenFF detected in ",ffdoc_filename,":",ffver)
f.close()
# warn the user about version mismatch
if strver != ffver:
print("\nWARNING: CGenFF versions are not equivalent!\n")
# in case something has gone horribly wrong
if (strver == 0) or (ffver == 0):
print("\nERROR: Could not detect CGenFF version. Exiting.\n")
exit()
#-----------------------------------------------------------------------
## jal
def is_lp(s):
if ((s[0]=='L') and (s[1]=='P')):
return True
return False
#-----------------------------------------------------------------------
## jal
def is_lp_host_atom(self,name):
for ai in range (0,self.nvsites):
if (name==self.G.node[ai]['at1']):
return True
return False
#-----------------------------------------------------------------------
## jal - only for COLINEAR lone pairs, since CGenFF only needs this now
def construct_lp(x1,y1,z1,x2,y2,z2,dist):
dx = x1-x2
dy = y1-y2
dz = z1-z2
dr = math.sqrt(dx*dx+dy*dy+dz*dz)
dr = dist/dr
# set LP coords
xlp = x1+dr*dx
ylp = y1+dr*dy
zlp = z1+dr*dz
return xlp,ylp,zlp
#-----------------------------------------------------------------------
## jal
def find_vsite(self, atnum):
for i in range (0, self.nvsites):
# if we find the LP host, find the LP atom index
if (self.G.node[i]['at1'] == self.G.node[atnum]['name']):
for j in range (0, self.natoms):
if (self.G.node[i]['vsite'] == self.G.node[j]['name']):
return j
#-----------------------------------------------------------------------
def read_gmx_atomtypes(filename):
atomtypes = []
f = open(filename, 'r')
for line in f.readlines():
if line.startswith(";"):
continue
if line == '\n':
continue
entry = re.split('\s+', line.lstrip())
var = [entry[0],entry[1]]
atomtypes.append(var)
f.close()
return atomtypes
#-----------------------------------------------------------------------
def get_filelist_from_gmx_forcefielditp(ffdir,ffparentfile):
filelist=[]
f = open(ffdir+"/"+ffparentfile, 'r')
for line in f.readlines():
if line.startswith("#include"):
entry = re.split('\s+', line.lstrip())
filename = ffdir + "/" + entry[1].replace("\"","")
filelist.append(filename)
return filelist
#-----------------------------------------------------------------------
def read_gmx_anglpars(filename):
angllines = []
f = open(filename, 'r')
section="NONE"
for line in f.readlines():
if line.startswith(";"):
continue
if line.startswith("\n"):
continue
if line.startswith("["):
section="NONE"
if(section=="ANGL"):
angllines.append(line)
if line.startswith("[ angletypes ]"):
section="ANGL"
anglpars = []
anglpar = {}
for line in angllines:
entry = re.split('\s+', line.lstrip())
ai, aj, ak, eq = entry[0],entry[1],entry[2],float(entry[4])
anglpars.append([ai,aj,ak,eq])
return anglpars
#-----------------------------------------------------------------------
def get_charmm_rtp_lines(filename,molname):
foundmol=0
store=0
rtplines=[]
f = open(filename, 'r')
section="NONE"
for line in f.readlines():
if(store==1) and line.startswith("RESI"):
store=0
if line.startswith("RESI"):
entry = re.split('\s+', line.lstrip())
rtfmolname=entry[1]
if(rtfmolname == molname):
store=1
if line.startswith("END"):
store=0
if(store==1):
rtplines.append(line)
return rtplines
#-----------------------------------------------------------------------
def get_charmm_prm_lines(filename):
foundmol=0
store=0
prmlines=[]
f = open(filename, 'r')
section="NONE"
for line in f.readlines():
if line.startswith("END"):
section="NONE"
store=0
if(store):
prmlines.append(line)
if line.startswith("read para"):
section="PRM"
store=1
return prmlines
#-----------------------------------------------------------------------
def parse_charmm_topology(rtplines):
topology = {}
noblanks = [x for x in rtplines if len(x.strip())>0]
nocomments = [x for x in noblanks if x.strip()[0] not in ['*','!']]
section = "BONDS" # default
state = "free"
for line in nocomments:
if state == "free":
if line.find("MASS") == 0:
if "ATOMS" not in list(topology.keys()):
topology["ATOMS"] = {}
s = line.split()
idx,name,mass,type = int(s[1]),s[2],float(s[3]),s[4]
if line.find("!"):
comment = line[line.find("!")+1:].strip()
else:
comment = ""
topology["ATOMS"][name] = [idx,mass,type,comment]
elif line.find("DECL") == 0:
if "DECL" not in list(topology.keys()):
topology["DECL"] = []
decl = line.split()[1]
topology["DECL"].append(decl)
elif line.find("DEFA") == 0:
topology["DEFA"] = line[4:]
elif line.find("AUTO") == 0:
topology["AUTO"] = line[4:]
elif line.find("RESI") == 0:
if "RESI" not in list(topology.keys()):
topology["RESI"] = {}
state = "resi"
s = line.split()
resname, charge = s[1],float(s[2])
topology["RESI"][resname] = {}
topology["RESI"][resname]["charge"] = charge
topology["RESI"][resname]["cmaps"] = []
topology["RESI"][resname]["vsites"] = []
topology["RESI"][resname]["bonds"] = []
topology["RESI"][resname]["impropers"] = []
topology["RESI"][resname]["double_bonds"] = []
group = -1
elif line.find("PRES") == 0:
state = "pres"
s = line.split()
presname, charge = s[1],float(s[2])
elif line.find("END") == 0:
return topology
elif state == "resi":
if line.find("RESI")==0:
state = "resi"
s = line.split()
resname, charge = s[1],float(s[2])
topology["RESI"][resname] = {}
topology["RESI"][resname]["charge"] = charge
topology["RESI"][resname]["cmaps"] = []
topology["RESI"][resname]["vsites"] = []
topology["RESI"][resname]["bonds"] = []
topology["RESI"][resname]["impropers"] = []
topology["RESI"][resname]["double_bonds"] = []
#topology["RESI"][resname]["groups"] = []
group = -1
elif line.find("GROU")==0:
group += 1
topology["RESI"][resname][group] = []
elif line.find("ATOM")==0:
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
name,type,charge = s[1],s[2],float(s[3])
topology["RESI"][resname][group].append((name,type,charge))
## jal - adding lone pair support
elif line.find("LONE")==0:
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
name,at1,at2,dist = s[2],s[3],s[4],(float(s[6])*0.1)
topology["RESI"][resname]["vsites"].append((name,at1,at2,dist))
elif line.find("BOND")==0:
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
nbond = (len(s)-1)/2
for i in range(nbond):
p,q = s[1+2*i],s[2+2*i]
## jal - ignore "bonds" to lone pairs
if ((is_lp(p)==False) and (is_lp(q)==False)):
topology["RESI"][resname]["bonds"].append((p,q))
elif line.find("DOUB")==0:
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
ndouble = (len(s)-1)/2
for i in range(ndouble):
p,q = s[1+2*i],s[2+2*i]
topology["RESI"][resname]["double_bonds"].append((p,q))
elif line.find("IMPR")==0:
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
nimproper = (len(s)-1)/4
for i in range(nimproper):
impr = s[1+4*i],s[2+4*i],s[3+4*i],s[4+4*i]
topology["RESI"][resname]["impropers"].append(impr)
elif line.find("CMAP")==0:
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
#nimproper = (len(s)-1)/4
#for i in range(nimproper):
cmap = s[1:9]
topology["RESI"][resname]["cmaps"].append(cmap)
elif line.find("DONOR")==0:
continue # ignore for now
elif line.find("ACCEPTOR")==0:
continue
elif line.find("IC")==0:
continue
return topology
#-----------------------------------------------------------------------
def parse_charmm_parameters(prmlines):
parameters = {}
cmapkey = ()
noblanks = [x for x in prmlines if len(x.strip())>0]
nocomments = [x for x in noblanks if x.strip()[0] not in ['*','!']]
section = "ATOM" # default
for line in nocomments:
#print line
sectionkeys = [ "BOND", "ANGL", "DIHE", \
"IMPR", "CMAP", "NONB", "HBON", "NBFI" ]
key = line.split()[0]
#exit()
if key[0:4] in sectionkeys:
section = key[0:4]
continue
if section not in list(parameters.keys()):
parameters[section] = []
#print line
if section == "BOND":
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
ai, aj, kij, rij = s[0],s[1],float(s[2]),float(s[3])
parameters["BOND"].append((ai,aj,kij,rij))
elif section == "ANGL":
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
ai, aj, ak = s[0],s[1],s[2]
other = list(map(float,s[3:]))
parameters["ANGL"].append([ai,aj,ak]+other)
elif section == "DIHE":
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
ai, aj, ak, al, k, n, d = s[0],s[1],s[2],s[3],float(s[4]),int(s[5]),float(s[6])
parameters["DIHE"].append([ai,aj,ak,al,k,n,d])
elif section == "IMPR":
if line.find('!'):
line = line[:line.find('!')]
s = line.split()
ai, aj, ak, al, k, d = s[0],s[1],s[2],s[3],float(s[4]),float(s[6])
parameters["IMPR"].append([ai,aj,ak,al,k,d])
elif section == "CMAP":
if line.find('!'):
line = line[:line.find('!')]
if cmapkey == ():
s = line.split()
a,b,c,d,e,f,g,h = s[0:8]
N = int(s[8])
cmapkey = (a,b,c,d,e,f,g,h,N)
cmaplist = []
else:
cmapdata = list(map(float,line.split()))
cmaplist += cmapdata
if len(cmaplist) == N**2:
parameters["CMAP"].append([cmapkey,cmaplist])
cmapkey = ()
elif section == "NONB":
if line.find("cutnb")>=0 or line.find("wmin")>=0 or line.find("CUTNB")>=0 or line.find("WMIN")>=0 :
continue
bang = line.find('!')
if bang>0:
comment = line[bang+1:]
prm = line[:bang].split()
else:
comment = ""
prm = line.split()
atname = prm[0]
epsilon = -float(prm[2])
half_rmin = float(prm[3])
parameters["NONB"].append((atname,epsilon,half_rmin))
if len(prm)>4:
epsilon14 = -float(prm[5])
half_rmin14 = float(prm[6])
if "NONBONDED14" not in list(parameters.keys()):
parameters["NONBONDED14"] = []
parameters["NONBONDED14"].append((atname,epsilon14,half_rmin14))
return parameters
#-----------------------------------------------------------------------
def write_gmx_bon(parameters,header_comments,filename):
kcal2kJ = 4.18400
outp = open(filename,"w")
outp.write("%s\n"%(header_comments))
outp.write("[ bondtypes ]\n")
kbond_conversion = 2.0*kcal2kJ/(0.1)**2 # [kcal/mol]/A**2 -> [kJ/mol]/nm**2
# factor of 0.5 because charmm bonds are Eb(r)=Kb*(r-r0)**2
rbond_conversion = .1 # A -> nm
outp.write(";%7s %8s %5s %12s %12s\n"%("i","j","func","b0","kb"))
if("BOND" in parameters):
for p in parameters["BOND"]:
ai,aj,kij,rij = p
rij *= rbond_conversion
kij *= kbond_conversion
outp.write("%8s %8s %5i %12.8f %12.2f\n"%(ai,aj,1,rij,kij))
kangle_conversion = 2.0*kcal2kJ # [kcal/mol]/rad**2 -> [kJ/mol]/rad**2
# factor of 0.5 because charmm angles are Ea(r)=Ka*(a-a0)**2
kub_conversion = 2.0*kcal2kJ/(0.1)**2 # [kcal/mol]/A**2 -> [kJ/mol]/nm**2prm
ub0_conversion = 0.1 # A -> nm
outp.write("\n\n[ angletypes ]\n")
outp.write(";%7s %8s %8s %5s %12s %12s %12s %12s\n"\
%("i","j","k","func","theta0","ktheta","ub0","kub"))
if("ANGL" in parameters):
for p in parameters["ANGL"]:
if len(p) == 5:
ai,aj,ak,kijk,theta = p
kub = 0.0
ub0 = 0.0
else:
ai,aj,ak,kijk,theta,kub,ub0 = p
kijk *= kangle_conversion
kub *= kub_conversion
ub0 *= ub0_conversion
outp.write("%8s %8s %8s %5i %12.6f %12.6f %12.8f %12.2f\n"\
%(ai,aj,ak,5,theta,kijk,ub0,kub))
kdihe_conversion = kcal2kJ
outp.write("\n\n[ dihedraltypes ]\n")
outp.write(";%7s %8s %8s %8s %5s %12s %12s %5s\n"\
%("i","j","k","l","func","phi0","kphi","mult"))
#parameters["DIHEDRALS"].sort(demote_wildcards)
if("DIHE" in parameters):
for p in parameters["DIHE"]:
ai,aj,ak,al,k,n,d = p
k *= kdihe_conversion
outp.write("%8s %8s %8s %8s %5i %12.6f %12.6f %5i\n"\
%(ai,aj,ak,al,9,d,k,n))
kimpr_conversion = kcal2kJ*2 # see above
outp.write("\n\n[ dihedraltypes ]\n")
outp.write("; 'improper' dihedrals \n")
outp.write(";%7s %8s %8s %8s %5s %12s %12s\n"\
%("i","j","k","l","func","phi0","kphi"))
if("IMPR" in parameters):
#parameters["IMPROPERS"].sort(demote_wildcards)
for p in parameters["IMPR"]:
ai,aj,ak,al,k,d = p
k *= kimpr_conversion
outp.write("%8s %8s %8s %8s %5i %12.6f %12.6f\n"\
%(ai,aj,ak,al,2,d,k))
outp.close()
return
#-----------------------------------------------------------------------
def write_gmx_mol_top(filename,ffdir,prmfile,itpfile,molname):
outp = open(filename,"w")
outp.write("#include \"%s/forcefield.itp\"\n" % (ffdir))
outp.write("\n")
outp.write("; additional params for the molecule\n")
outp.write("#include \"%s\"\n" % (prmfile))
outp.write("\n")
outp.write("#include \"%s\"\n" % (itpfile))
outp.write("\n")
outp.write("#include \"%s/tip3p.itp\"\n" % (ffdir))
outp.write("#ifdef POSRES_WATER\n")
outp.write("; Position restraint for each water oxygen\n")
outp.write("[ position_restraints ]\n")
outp.write("; i funct fcx fcy fcz\n")
outp.write(" 1 1 1000 1000 1000\n")
outp.write("#endif\n")
outp.write("\n")
outp.write("; Include topology for ions\n")
outp.write("#include \"%s/ions.itp\"\n" % (ffdir))
outp.write("\n")
outp.write("[ system ]\n")
outp.write("; Name\n")
outp.write("mol\n")
outp.write("\n")
outp.write("[ molecules ]\n")
outp.write("; Compound #mols\n")
outp.write("%s 1\n" % (molname))
outp.write("\n")
outp.close()
#=================================================================================================================
class atomgroup:
"""
A class that contains the data structures and functions to store and process
data related to groups of atoms (read molecules)
USAGE: m = atomgroup()
"""
def __init__(self):
self.G = nx.Graph()
self.name = ""
self.natoms = 0
self.nvsites = 0
self.nbonds = 0
self.angles = []
self.nangles = 0
self.dihedrals = []
self.ndihedrals = 0
self.impropers = []
self.nimpropers = 0
#self.coord=np.zeros((self.natoms,3),dtype=float)
#-----------------------------------------------------------------------
def read_charmm_rtp(self,rtplines,atomtypes):
"""
Reads CHARMM rtp
Reads atoms, bonds, impropers
Stores connectivity as a graph
Autogenerates angles and dihedrals
USAGE: m = atomgroup() ; m.read_charmm_rtp(rtplines,atomtypes)
"""
#initialize everything
self.G = nx.Graph()
self.name = ""
self.natoms = 0
self.nvsites = 0
self.nbonds = 0
self.angles = []
self.nangles = 0
self.dihedrals = []
self.ndihedrals = 0
self.impropers = []
self.nimpropers = 0
atm = {}
for line in rtplines:
if line.find('!'):
line = line[:line.find('!')]
if line.startswith("RESI"):
entry = re.split('\s+', line.lstrip())
self.name=entry[1]
if line.startswith("ATOM"):
entry = re.split('\s+', line.lstrip())
atm[self.natoms] = {'type':entry[2], 'resname':self.name, 'name':entry[1],
'charge':float(entry[3]),'mass':float(0.00), 'beta':float(0.0),
'x':float(9999.9999),'y':float(9999.9999),'z':float(9999.9999),'segid':self.name, 'resid':'1' }
for typei in atomtypes:
if(typei[0] == atm[self.natoms]['type']):
atm[self.natoms]['mass'] = float(typei[1])
break
#self.G.add_node(self.natoms, atm[self.natoms])
#@@@@@@@@@@@@@@ Conrard modification @@@@@@@@@@@
att = self.natoms # attribute
nodd = {att:atm[self.natoms]} # node to add
self.G.add_node(att) #instanciate the attribute
nx.set_node_attributes(self.G, nodd)
self.natoms=self.natoms+1
## jal - adding lone pair support
if line.startswith("LONE"):
#entry = re.split('\s+', line.rstrip(line.lstrip()))
entry = re.split('\s+', line.lstrip())
atm[self.nvsites] = {'vsite':entry[2], 'at1':entry[3], 'at2':entry[4],
'dist':(float(entry[6])*0.1),
'x':float(9999.9999),'y':float(9999.9999),'z':float(9999.9999) }
# DEBUG
# print "Found lone pair in RTF: %s %s %s %.3f\n" % (atm[self.nvsites]['vsite'], atm[self.nvsites]['at1'], atm[self.nvsites]['at2'], atm[self.nvsites]['dist'])
# jal - as above
att = self.nvsites
nodd = {att:atm[self.nvsites]}
self.G.add_node(att)
nx.set_node_attributes(self.G, nodd)
self.nvsites=self.nvsites+1
if line.startswith("BOND") or line.startswith("DOUB"):
#entry = re.split('\s+', line.rstrip(line.lstrip()))
entry = re.split('\s+', line.lstrip())
numbonds = int((len(entry)-1)/2)
for bondi in range(0,numbonds):
found1 = False
found2 = False
for i in range(0,self.natoms):
if(atm[i]['name'] == entry[(bondi*2)+1]):
found1 = True
break
for j in range(0,self.natoms):
if(atm[j]['name'] == entry[(bondi*2)+2]):
found2 = True
break
if(not found1):
print("Error:atomgroup:read_charmm_rtp> Atomname not found in top",entry[(bondi*2)+1])
if(not found2):
print("Error:atomgroup:read_charmm_rtp> Atomname not found in top",entry[(bondi*2)+2])
## jal - ignore "bonds" to lone pairs
if ((is_lp(atm[i]['name'])==False) and (is_lp(atm[j]['name'])==False)):
self.G.add_edge(i,j)
self.G[i][j]['order']='1' # treat all bonds as single for now
self.nbonds=self.nbonds+1
if line.startswith("IMP"):
entry = re.split('\s+', line.lstrip())
numimpr = int((len(entry)-2)/4)
for impi in range(0,numimpr):
for i in range(0,self.natoms):
if(atm[i]['name'] == entry[(impi*4)+1]):
break
for j in range(0,self.natoms):
if(atm[j]['name'] == entry[(impi*4)+2]):
break
for k in range(0,self.natoms):
if(atm[k]['name'] == entry[(impi*4)+3]):
break
for l in range(0,self.natoms):
if(atm[l]['name'] == entry[(impi*4)+4]):
break
var = [i,j,k,l]
self.impropers.append(var)
self.nimpropers = len(self.impropers)
if(self.ndihedrals > 0 or self.nangles > 0):
print("WARNING:atomgroup:read_charmm_rtp> Autogenerating angl-dihe even though they are preexisting",self.nangles,self.ndihedrals)
self.autogen_angl_dihe()
self.coord = np.zeros((self.natoms,3),dtype=float)
#-----------------------------------------------------------------------
def autogen_angl_dihe(self):
self.angles = []
for atomi in range(0,self.natoms):
nblist = []
for nb in self.G.neighbors(atomi):
nblist.append(nb)
for i in range(0,len(nblist)-1):
for j in range(i+1,len(nblist)):
var = [nblist[i],atomi,nblist[j]]
self.angles.append(var)
self.nangles = len(self.angles)
self.dihedrals = []
for i,j in self.G.edges():
nblist1 = []
for nb in self.G.neighbors(i):
if(nb != j):
nblist1.append(nb)
nblist2 = []
for nb in self.G.neighbors(j):
if(nb != i):
nblist2.append(nb)
if(len(nblist1) > 0 and len(nblist2) > 0 ):
for ii in range(0,len(nblist1)):
for jj in range(0,len(nblist2)):
var = [nblist1[ii],i,j,nblist2[jj]]
if(var[0] != var[3]):
self.dihedrals.append(var)
self.ndihedrals = len(self.dihedrals)
#-----------------------------------------------------------------------
def get_nonplanar_dihedrals(self,angl_params):
nonplanar_dihedrals=[]
cutoff=179.9
for var in self.dihedrals:
d1=self.G.node[var[0]]['type']
d2=self.G.node[var[1]]['type']
d3=self.G.node[var[2]]['type']
d4=self.G.node[var[3]]['type']
keep=1
for angl_param in angl_params:
p1=angl_param[0]
p2=angl_param[1]
p3=angl_param[2]
eq=angl_param[3]
if( d2==p2 and ( ( d1==p1 and d3==p3) or (d1==p3 and d3==p1))):
if(eq > cutoff):
keep=-1
break
if( d3==p2 and ( (d2==p1 and d4==p3) or (d2==p3 and d4==p1))):
if(eq > cutoff):
keep=-1
break
if(keep==1):
nonplanar_dihedrals.append(var)
return nonplanar_dihedrals
#-----------------------------------------------------------------------
def write_gmx_itp(self,filename,angl_params):
f = open(filename, 'w')
f.write("; Created by cgenff_charmm2gmx.py\n")
f.write("\n")
f.write("[ moleculetype ]\n")
f.write("; Name nrexcl\n")
f.write("%s 3\n" % self.name)
f.write("\n")
f.write("[ atoms ]\n")
f.write("; nr type resnr residue atom cgnr charge mass typeB chargeB massB\n")
f.write("; residue 1 %s rtp %s q qsum\n" % (self.name,self.name))
pairs14 = nx.Graph()
for atomi in range(0,self.natoms):
pairs14.add_node(atomi)
f.write("%6d %10s %6s %6s %6s %6d %10.3f %10.3f ;\n" %
( atomi+1,self.G.node[atomi]['type'],
self.G.node[atomi]['resid'],self.name,self.G.node[atomi]['name'],atomi+1,
self.G.node[atomi]['charge'],self.G.node[atomi]['mass'] ) )
f.write("\n")
f.write("[ bonds ]\n")
f.write("; ai aj funct c0 c1 c2 c3\n")
for i,j in self.G.edges():
#f.write("%5d %5d 1\n" % (i+1,j+1) )
f.write("%5d %5d 1 ; %10s %10s\n" % (i+1,j+1,self.G.node[i]['type'],self.G.node[j]['type']) )
f.write("\n")
f.write("[ pairs ]\n")
f.write("; ai aj funct c0 c1 c2 c3\n")
for var in self.dihedrals:
if (len(nx.dijkstra_path(self.G,var[0],var[3])) == 4): #this is to remove 1-2 and 1-3 included in dihedrals of rings
pairs14.add_edge(var[0],var[3])
for i,j in pairs14.edges():
f.write("%5d %5d 1\n" % (i+1,j+1) )
#f.write("%5d %5d 1 ; %10s %10s\n" % (i+1,j+1,self.G.node[i]['type'],self.G.node[j]['type']) )
## jal - add LP pairs, same as parent atom
## Use is_lp_host_atom() to test each index, then find associated vsite
if ((is_lp_host_atom(self,self.G.node[i]['name'])==True)):
k = find_vsite(self, i)
f.write("%5d %5d 1\n" % (k+1,j+1) )
if ((is_lp_host_atom(self,self.G.node[j]['name'])==True)):
k = find_vsite(self, j)
f.write("%5d %5d 1\n" % (k+1,i+1) )
f.write("\n")
f.write("[ angles ]\n")
f.write("; ai aj ak funct c0 c1 c2 c3\n")
for var in self.angles:
f.write("%5d %5d %5d 5 ; %10s %10s %10s\n" % (var[0]+1,var[1]+1,var[2]+1,\
self.G.node[var[0]]['type'],self.G.node[var[1]]['type'],self.G.node[var[2]]['type']) )
f.write("\n")
f.write("[ dihedrals ]\n")
f.write("; ai aj ak al funct c0 c1 c2 c3 c4 c5\n")
nonplanar_dihedrals=self.get_nonplanar_dihedrals(angl_params)
for var in nonplanar_dihedrals:
f.write("%5d %5d %5d %5d 9 ; %10s %10s %10s %10s\n" % (var[0]+1,var[1]+1,var[2]+1,var[3]+1,\
self.G.node[var[0]]['type'],self.G.node[var[1]]['type'],self.G.node[var[2]]['type'],self.G.node[var[3]]['type']) )
f.write("\n")
if(self.nimpropers > 0):
f.write("[ dihedrals ]\n")
f.write("; ai aj ak al funct c0 c1 c2 c3\n")
for var in self.impropers:
f.write("%5d %5d %5d %5d 2\n" % (var[0]+1,var[1]+1,var[2]+1,var[3]+1) )
f.write("\n")
## jal - add vsite directive
## we use 2fd construction, introduced in GROMACS-2020
if (self.nvsites > 0):
func=2
f.write("[ virtual_sites2 ]\n")
f.write("; Site from funct a\n")
for atomi in range (0,self.nvsites):
vsite = 0
at1 = 0
at2 = 0
# find atom name matches
for ai in range (0, self.natoms):
if (self.G.node[ai]['name'] == self.G.node[atomi]['vsite']):
vsite = ai
if (self.G.node[ai]['name'] == self.G.node[atomi]['at1']):
at1 = ai
if (self.G.node[ai]['name'] == self.G.node[atomi]['at2']):
at2 = ai
dist=self.G.node[atomi]['dist']*-1 # invert sign for GROMACS convention
f.write("%5d %5d %5d %5d %8.3f\n" % (vsite+1, at1+1, at2+1, func, dist))
f.write("\n")
## jal - add exclusions for vsite
if (self.nvsites > 0):
f.write("[ exclusions ]\n")
f.write("; ai aj\n")
## jal - explicitly add all 1-2, 1-3, and 1-4 exclusions
## for the lone pair, which are the same as 1-2, 1-3, 1-4
## exclusions for the host (bonds, angles, pairs)
# first, exclude any LP from its host
for i in range (0, self.natoms):
if ((is_lp_host_atom(self,self.G.node[i]['name'])==True)):
# find the LP attached to this host, not necessarily consecutive
# in the topology
j = find_vsite(self, i)
f.write("%5d %5d 1\n" % (i+1,j+1) )
# first neighbors: 1-2
for i,j in self.G.edges():
if ((is_lp_host_atom(self,self.G.node[i]['name'])==True)):
k = find_vsite(self, i)
f.write("%5d %5d 1\n" % (k+1,j+1) )
if ((is_lp_host_atom(self,self.G.node[j]['name'])==True)):
k = find_vsite(self, j)
f.write("%5d %5d 1\n" % (k+1,i+1) )
# second neighbors: 1-3
for var in self.angles:
# only need to consider ends of the angle, not middle atom
ai = var[0]
ak = var[2]
if ((is_lp_host_atom(self,self.G.node[ai]['name'])==True)):
l = find_vsite(self, ai)
f.write("%5d %5d 1\n" % (l+1,ak+1) )
if ((is_lp_host_atom(self,self.G.node[ak]['name'])==True)):
l = find_vsite(self, ak)
f.write("%5d %5d 1\n" % (l+1,ai+1) )
# third neighbors: 1-4
for i,j in pairs14.edges():
if ((is_lp_host_atom(self,self.G.node[i]['name'])==True)):
k = find_vsite(self, i)
f.write("%5d %5d 1\n" % (k+1,j+1) )
if ((is_lp_host_atom(self,self.G.node[j]['name'])==True)):
k = find_vsite(self, j)
f.write("%5d %5d 1\n" % (k+1,i+1) )
f.write("\n")
f.close()
#-----------------------------------------------------------------------
def read_mol2_coor_only(self,filename):
check_natoms = 0
check_nbonds = 0
f = open(filename, 'r')
atm = {}
section="NONE"
for line in f.readlines():
secflag=False
if line.startswith("@"):
secflag=True
section="NONE"
if((section=="NATO") and (not secflag)):
entry = re.split('\s+', line.lstrip())
check_natoms=int(entry[0])
check_nbonds=int(entry[1])
if(check_natoms != self.natoms):
# jal - if there are lone pairs, these will not be in the mol2 file
if (self.nvsites == 0):
print("Error in atomgroup.py: read_mol2_coor_only: no. of atoms in mol2 (%d) and top (%d) are unequal" % (check_natoms, self.natoms))
print("Usually this means the specified residue name does not match between str and mol2 files")
#print check_natoms,self.natoms
exit()
else:
print("")
print("NOTE 5: %d lone pairs found in topology that are not in the mol2 file. This is not a problem, just FYI!\n" % (self.nvsites))
# jal - if we have correctly ignored bonds to LP then there is no need
# for any check here
if(check_nbonds != self.nbonds):
print("Error in atomgroup.py: read_mol2_coor_only: no. of bonds in mol2 (%d) and top (%d) are unequal" % (check_nbonds, self.nbonds))
#print check_nbonds,self.nbonds
exit()
section="NONE"
if((section=="MOLE") and (not secflag)):
self.name=line.strip()
section="NATO" #next line after @MOLECULE contains atom, bond numbers
if((section=="ATOM") and (not secflag)):
entry = re.split('\s+', line.lstrip())
## guard against blank lines
if (len(entry) > 1):
## jal - if there are lone pairs, these are not in mol2
## and are not necessarily something we can just tack on at the
## end of the coordinate section. Here, check the atom to see if it is
## the first constructing atom, and if so, we put in a dummy LP entry.
atomi = int(entry[0])-1
self.G.node[atomi]['x'] = float(entry[2])
self.G.node[atomi]['y'] = float(entry[3])
self.G.node[atomi]['z'] = float(entry[4])
self.coord[atomi][0] = float(entry[2])
self.coord[atomi][1] = float(entry[3])
self.coord[atomi][2] = float(entry[4])
## jal - if we have an atom that is the host for a LP, insert
## the LP into the list
if (is_lp_host_atom(self,self.G.node[atomi]['name'])):
atomj = find_vsite(self, atomi)
# insert dummy entry for LP
self.G.node[atomj]['x'] = float(9999.99)
self.G.node[atomj]['y'] = float(9999.99)
self.G.node[atomj]['z'] = float(9999.99)
self.coord[atomj][0] = float(9999.99)
self.coord[atomj][1] = float(9999.99)
self.coord[atomj][2] = float(9999.99)
if line.startswith("@MOLECULE"):
section="MOLE"
if line.startswith("@ATOM"):
section="ATOM"
if line.startswith("@BOND"):
section="BOND"
#-----------------------------------------------------------------------
def write_pdb(self,f):
for atomi in range(0,self.natoms):
if(len(self.G.node[atomi]['name']) > 4):
print("error in atomgroup.write_pdb(): atom name > 4 characters")
exit()
## jal - construct LP sites
if (is_lp(self.G.node[atomi]['name'])):
# DEBUG
# print "Found LP in write_pdb: %s\n" % self.G.node[atomi]['name']
# find constructing atoms, get their coordinates and construction distance*10
atn1 = "dum"
atn2 = "dum"
dist = 0
# loop over vsites
for ai in range (0,self.nvsites):
if (self.G.node[ai]['vsite'] == self.G.node[atomi]['name']):
atn1 = self.G.node[ai]['at1'] # atom name
atn2 = self.G.node[ai]['at2'] # atom name
dist = self.G.node[ai]['dist']*10 # Angstrom for PDB, was saved as *0.1 for GMX
# get atom indices
at1 = 0
at2 = 0
for ai in range (0, self.natoms):
if (self.G.node[ai]['name'] == atn1):
at1 = ai
if (self.G.node[ai]['name'] == atn2):
at2 = ai
# in case of failure
if ((at1==0) and (at2==0)):
print("Failed to match LP-constructing atoms in write_pdb!\n")
exit()
# DEBUG
# print "Found LP in write_pdb: %d %s %s with dist: %.3f\n" % ((atomi+1,at1+1,at2+1,dist))
# at1, at2, and dist only exist in vsite structure!
x1=self.coord[at1][0]
y1=self.coord[at1][1]
z1=self.coord[at1][2]
x2=self.coord[at2][0]
y2=self.coord[at2][1]
z2=self.coord[at2][2]
xlp,ylp,zlp = construct_lp(x1,y1,z1,x2,y2,z2,dist)
self.coord[atomi][0] = xlp
self.coord[atomi][1] = ylp
self.coord[atomi][2] = zlp
f.write("%-6s%5d %-4s %-4s%5s%12.3f%8.3f%8.3f%6.2f%6.2f\n" %
("ATOM",atomi+1,self.G.node[atomi]['name'],self.name,self.G.node[atomi]['resid'],self.coord[atomi][0],
self.coord[atomi][1],self.coord[atomi][2],1.0,self.G.node[atomi]['beta']))
f.write("END\n")
#=================================================================================================================
if(len(sys.argv) != 5):
print("Usage: RESNAME drug.mol2 drug.str charmm36.ff")
exit()
#check for compatible NetworkX version
if(float(nx.__version__) < 2.0):
print("Your NetworkX version is: ",nx.__version__)
print("This script requires a version in the 2.x series")
print("Your NetworkX package is incompatible with this conversion script and cannot be used.")
exit()
else:
if(float(nx.__version__) > 2.3):
print("This script has been tested with NetworkX 2.3, and 2.4 is buggy.")
print("Please install version 2.3 for best performance:")
print("pip uninstall networkx")
print("pip install networkx==2.3")
if(sys.version_info < (3,0)):
print("You are using a Python version in the 2.x series. This script requires Python 3.0 or higher.")
print("Please visit http://mackerell.umaryland.edu/charmm_ff.shtml#gromacs to get a script for Python 3.x")
exit()
mol_name = sys.argv[1]
mol2_name = sys.argv[2]
rtp_name = sys.argv[3]
ffdir = sys.argv[4]
atomtypes_filename = ffdir + "/atomtypes.atp"
print("NOTE 1: Code tested with Python 3.5.2 and 3.7.3. Your version:",sys.version)
print("")
print("NOTE 2: Code tested with NetworkX 2.3. Your version:",nx.__version__)
print("")
print("NOTE 3: Please be sure to use the same version of CGenFF in your simulations that was used during parameter generation:")
check_versions(rtp_name,ffdir + "/forcefield.doc")
print("")
print("NOTE 4: To avoid duplicated parameters, do NOT select the 'Include parameters that are already in CGenFF' option when uploading a molecule into CGenFF.")
#for output
itpfile = mol_name.lower() + ".itp"
prmfile = mol_name.lower() + ".prm"
initpdbfile = mol_name.lower() + "_ini.pdb"
topfile = mol_name.lower() +".top"
atomtypes = read_gmx_atomtypes(atomtypes_filename)
angl_params = [] #needed for detecting triple bonds
filelist = get_filelist_from_gmx_forcefielditp(ffdir,"forcefield.itp")
for filename in filelist:
anglpars = read_gmx_anglpars(filename)
angl_params = angl_params + anglpars
m = atomgroup()
rtplines=get_charmm_rtp_lines(rtp_name,mol_name)
m.read_charmm_rtp(rtplines,atomtypes)
m.read_mol2_coor_only(mol2_name)
f = open(initpdbfile, 'w')
m.write_pdb(f)
f.close()
prmlines=get_charmm_prm_lines(rtp_name)
params = parse_charmm_parameters(prmlines)
write_gmx_bon(params,"",prmfile)
anglpars = read_gmx_anglpars(prmfile)
angl_params = angl_params + anglpars # append the new angl params
m.write_gmx_itp(itpfile,angl_params)
write_gmx_mol_top(topfile,ffdir,prmfile,itpfile,mol_name)
print("============ DONE ============")
print("Conversion complete.")
print("The molecule topology has been written to %s" % (itpfile))
print("Additional parameters needed by the molecule are written to %s, which needs to be included in the system .top" % (prmfile))
print("\nPLEASE NOTE: If your topology has lone pairs, you must use GROMACS version 2020 or newer to use 2fd construction")
print("Older GROMACS versions WILL NOT WORK as they do not support 2fd virtual site construction\n")
print("============ DONE ============")
exit()