In [1]:
import pandas as pd
fda=pd.read_csv("fda_result.csv")
fda.describe()
Out[1]:
LogP Weight Acceptors Donors
count 2733.000000 2733.000000 2733.000000 2733.000000
mean 1.004747 353.307243 3.302964 1.457739
std 3.045381 173.535010 3.817158 2.490125
min -13.196100 33.020915 -1.000000 -1.000000
25% -0.574710 242.089174 0.000000 0.000000
50% 1.406220 330.149998 3.000000 1.000000
75% 2.858500 430.061041 5.000000 2.000000
max 12.605800 1663.492352 31.000000 18.000000
In [2]:
chem=pd.read_csv("chem_result.csv")
chem.describe()
Out[2]:
LogP Weight Acceptors Donors
count 276048.000000 276048.000000 276048.000000 276048.000000
mean 1.765137 367.984002 1.715571 0.159787
std 1.517280 55.026487 2.804019 1.288923
min -6.022700 151.110399 -1.000000 -1.000000
25% 0.781820 333.170988 -1.000000 -1.000000
50% 1.844800 366.142941 2.000000 0.000000
75% 2.842400 400.225763 4.000000 1.000000
max 7.703800 598.269833 11.000000 5.000000
In [3]:
import matplotlib.pyplot as plt
import scipy.stats as stats
fig=plt.figure(figsize=(10,10))
density = stats.gaussian_kde(chem["LogP"])
n, x, _ = plt.hist(chem["LogP"], bins=50,histtype=u'step', density=True)  
plt.plot(x, density(x))
density = stats.gaussian_kde(fda["LogP"])
n, x, _ = plt.hist(fda["LogP"], bins=50,histtype=u'step', density=True)  
plt.plot(x, density(x))
plt.show()
In [4]:
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.optimize import curve_fit
from scipy.stats import linregress
#density = stats.gaussian_kde(fda["Weight"])
plt.hist2d(fda["Weight"],fda["LogP"], bins=50) #,histtype=u'step', density=True)  
#plt.plot(x, density(x))
plt.show()
linregress(fda["Weight"],fda["LogP"])
#fit=np.polyfit(chem["Weight"],chem["LogP"],1)
#f=np.poly1d(fit)
#f
#popt, pcov = curve_fit(f, chem["Weight"], chem["LogP"])
#residuals = ydata- f(chem["Weight"], popt)
#ss_res = numpy.sum(residuals**2)
#ss_tot = numpy.sum((ydata-numpy.mean(ydata))**2)
#r_squared = 1 - (ss_res / ss_tot)
Out[4]:
LinregressResult(slope=0.0012299654702098398, intercept=0.5701911146609134, rvalue=0.07008714827006339, pvalue=0.0002455503658502025, stderr=0.00033498424262164915)
In [5]:
plt.hist2d(chem["Weight"],chem["LogP"], bins=50) #,histtype=u'step', density=True)  
#plt.plot(x, density(x))
plt.show()
linregress(chem["Weight"],chem["LogP"])
Out[5]:
LinregressResult(slope=0.008813250072827879, intercept=-1.4779984830302235, rvalue=0.31962595047854164, pvalue=0.0, stderr=4.9728186952339225e-05)
In [24]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np

fig = plt.figure(figsize=(15,15))
ax = fig.add_subplot(111, projection='3d')

img = ax.scatter(fda["Weight"],fda["LogP"], fda["Acceptors"], c=fda["Donors"], cmap=plt.hot()) #,bins=[50,1,1,1])
fig.colorbar(img)
plt.show()
In [ ]:
# The black dots are those molecules for which hdon and hacc could not be calculated.
# This 4 d plot has no meaning currently.