import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import shap
import matplotlib.pyplot as plt
# 加载CSV数据
data = pd.read_csv('carbondatagyh.csv')
X = data.iloc[:, :-1]
Y = data.iloc[:, -1]
# 划分训练集和测试集
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
# 创建并训练随机森林回归模型
model = RandomForestRegressor()
model.fit(X_train, Y_train)
# 使用SHAP解释模型预测
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_train)
# 生成SHAP值的瀑布图可视化
shap.summary_plot(shap_values, X_train, plot_type='bar')
plt.show()
numpy 需要特殊版本
pip install numpy==1.23
评论区