import pandas as pd
fda=pd.read_csv("fda_result.csv")
fda.describe()
chem=pd.read_csv("chem_result.csv")
chem.describe()
import matplotlib.pyplot as plt
import scipy.stats as stats
fig=plt.figure(figsize=(10,10))
density = stats.gaussian_kde(chem["LogP"])
n, x, _ = plt.hist(chem["LogP"], bins=50,histtype=u'step', density=True)
plt.plot(x, density(x))
density = stats.gaussian_kde(fda["LogP"])
n, x, _ = plt.hist(fda["LogP"], bins=50,histtype=u'step', density=True)
plt.plot(x, density(x))
plt.show()
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.optimize import curve_fit
from scipy.stats import linregress
#density = stats.gaussian_kde(fda["Weight"])
plt.hist2d(fda["Weight"],fda["LogP"], bins=50) #,histtype=u'step', density=True)
#plt.plot(x, density(x))
plt.show()
linregress(fda["Weight"],fda["LogP"])
#fit=np.polyfit(chem["Weight"],chem["LogP"],1)
#f=np.poly1d(fit)
#f
#popt, pcov = curve_fit(f, chem["Weight"], chem["LogP"])
#residuals = ydata- f(chem["Weight"], popt)
#ss_res = numpy.sum(residuals**2)
#ss_tot = numpy.sum((ydata-numpy.mean(ydata))**2)
#r_squared = 1 - (ss_res / ss_tot)
plt.hist2d(chem["Weight"],chem["LogP"], bins=50) #,histtype=u'step', density=True)
#plt.plot(x, density(x))
plt.show()
linregress(chem["Weight"],chem["LogP"])
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure(figsize=(15,15))
ax = fig.add_subplot(111, projection='3d')
img = ax.scatter(fda["Weight"],fda["LogP"], fda["Acceptors"], c=fda["Donors"], cmap=plt.hot()) #,bins=[50,1,1,1])
fig.colorbar(img)
plt.show()
# The black dots are those molecules for which hdon and hacc could not be calculated.
# This 4 d plot has no meaning currently.