import pandas as pd
fda=pd.read_csv("fda_result.csv")
fda.describe()

chem=pd.read_csv("chem_result.csv")
chem.describe()

import matplotlib.pyplot as plt
import scipy.stats as stats
fig=plt.figure(figsize=(10,10))
density = stats.gaussian_kde(chem["LogP"])
n, x, _ = plt.hist(chem["LogP"], bins=50,histtype=u'step', density=True)  
plt.plot(x, density(x))
density = stats.gaussian_kde(fda["LogP"])
n, x, _ = plt.hist(fda["LogP"], bins=50,histtype=u'step', density=True)  
plt.plot(x, density(x))
plt.show()

import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.optimize import curve_fit
from scipy.stats import linregress
#density = stats.gaussian_kde(fda["Weight"])
plt.hist2d(fda["Weight"],fda["LogP"], bins=50) #,histtype=u'step', density=True)  
#plt.plot(x, density(x))
plt.show()
linregress(fda["Weight"],fda["LogP"])
#fit=np.polyfit(chem["Weight"],chem["LogP"],1)
#f=np.poly1d(fit)
#f
#popt, pcov = curve_fit(f, chem["Weight"], chem["LogP"])
#residuals = ydata- f(chem["Weight"], popt)
#ss_res = numpy.sum(residuals**2)
#ss_tot = numpy.sum((ydata-numpy.mean(ydata))**2)
#r_squared = 1 - (ss_res / ss_tot)

LinregressResult(slope=0.0012299654702098398, intercept=0.5701911146609134, rvalue=0.07008714827006339, pvalue=0.0002455503658502025, stderr=0.00033498424262164915)

plt.hist2d(chem["Weight"],chem["LogP"], bins=50) #,histtype=u'step', density=True)  
#plt.plot(x, density(x))
plt.show()
linregress(chem["Weight"],chem["LogP"])

LinregressResult(slope=0.008813250072827879, intercept=-1.4779984830302235, rvalue=0.31962595047854164, pvalue=0.0, stderr=4.9728186952339225e-05)

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np

fig = plt.figure(figsize=(15,15))
ax = fig.add_subplot(111, projection='3d')

img = ax.scatter(fda["Weight"],fda["LogP"], fda["Acceptors"], c=fda["Donors"], cmap=plt.hot()) #,bins=[50,1,1,1])
fig.colorbar(img)
plt.show()

# The black dots are those molecules for which hdon and hacc could not be calculated.
# This 4 d plot has no meaning currently.

	LogP	Weight	Acceptors	Donors
count	2733.000000	2733.000000	2733.000000	2733.000000
mean	1.004747	353.307243	3.302964	1.457739
std	3.045381	173.535010	3.817158	2.490125
min	-13.196100	33.020915	-1.000000	-1.000000
25%	-0.574710	242.089174	0.000000	0.000000
50%	1.406220	330.149998	3.000000	1.000000
75%	2.858500	430.061041	5.000000	2.000000
max	12.605800	1663.492352	31.000000	18.000000

	LogP	Weight	Acceptors	Donors
count	276048.000000	276048.000000	276048.000000	276048.000000
mean	1.765137	367.984002	1.715571	0.159787
std	1.517280	55.026487	2.804019	1.288923
min	-6.022700	151.110399	-1.000000	-1.000000
25%	0.781820	333.170988	-1.000000	-1.000000
50%	1.844800	366.142941	2.000000	0.000000
75%	2.842400	400.225763	4.000000	1.000000
max	7.703800	598.269833	11.000000	5.000000