1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
| from sklearn.metrics import r2_score from sklearn import linear_model import matplotlib.pyplot as plt import numpy as np import pandas as pd from pylab import *
''' 文件用pandas读入命名为df colume是待分类的一列 label是待分类的列表,如我想分析data.csv文件夹下,的ads_ele下的x和Mg不同情况下的时候吸附能与一些变量的关系 y是想分析的因变量,如吸附能 x是想分析的自变量 '''
def classify_scatter(filename,column,y_goal,x_goal,labels): plt.figure(figsize=(8.3,6)) label_font = {'family':'Arial','weight':'normal','size':26} legend_font = {'family':'Arial','weight':'normal','size':15} plt.rcParams['xtick.direction'] = 'in' plt.rcParams['ytick.direction'] = 'in' plt.yticks(fontproperties = 'Arial', size = 24) plt.xticks(fontproperties = 'Arial', size = 20) plt.ylabel(y_goal,label_font) plt.xlabel(x_goal,label_font) minorticks_on() tick_params(which='major',width=2,length=6) tick_params(which='minor',width=2,length=4) bwith = 2 ax=plt.gca() ax.spines['bottom'].set_linewidth(bwith) ax.spines['top'].set_linewidth(bwith) ax.spines['left'].set_linewidth(bwith) ax.spines['right'].set_linewidth(bwith)
for label in labels: df = pd.read_excel(filename) df = df[df[column] == str(label)] y = np.array(df[y_goal]).reshape(-1,1) x = np.array(df[x_goal]).reshape(-1,1)
linear = linear_model.LinearRegression() linear.fit(x, y)
y_true = y y_pred = linear.predict(x) plt.scatter(x, y,label=label+':$R^2$='+str(round(r2_score(y_true,y_pred),3)),s=160,edgecolor='black',alpha=0.8) x_aixs = np.linspace(np.min(x)-0.1*(np.max(x)-np.min(x)),1.1*np.max(x)+0.3*(np.max(x)-np.min(x)),10, endpoint=True).reshape(-1,1) plt.plot(x_aixs, linear.predict(x_aixs), ls='dashed') plt.legend(loc='best',prop ={'family':'Arial','weight':'normal','size':14},fancybox=False,edgecolor='black') plt.tight_layout() plt.savefig(y_goal+' VS. '+x_goal+' sorted by '+column+'.pdf',dpi=300) print(label+'的R2:{}'.format(r2_score(y_true,y_pred)))
filename = 'raw.xlsx' for c in ['Spacegroup','type','ads_ele']: column = c df = pd.read_excel(filename)
temp = df[column].value_counts() labels = temp[temp>4].index.tolist()
y_analysis = 'Ads_En' for i in ['Bader','Workfunction', 'D -0']: x_analysis = i classify_scatter(filename,column,y_analysis,x_analysis,labels)
|