使用matplotlib绘制Raincloud图/云雨图/柱状图/小提琴图
需求: 使用Python的matplotlib绘制数据分布、数据箱型图、数据散点图
参考: https://blog.csdn.net/weixin_39559994/article/details/128197965?fromshare=blogdetail&sharetype=blogdetail&sharerId=128197965&sharerefer=PC&sharesource=captain_keating&sharefrom=from_link
输入数据格式: df为dataframe格式,样例如下
输出效果:
代码/作图函数:
def plot_raincloud():fig = plt.figure(figsize = (15,15),dpi = 300) # 创建Figure对象plt.rcParams['font.sans-serif'] = 'SimHei' # 设置字体为SimHei显示中文在这里插入代码片,如果你的坐标轴有中文名,须设置plt.rcParams['font.family'] = 'Arial' # 设置字体样式plt.rcParams['font.size'] = '14' # 设置字体大小 plt.rcParams['xtick.direction'] = 'out' #将x周的刻度线方向设置向内plt.rcParams['ytick.direction'] = 'out' #将y轴的刻度方向设置向内colors = ['tomato', 'darksalmon', 'deepskyblue', 'mediumseagreen', 'orange','#bce27f','#aab8d8']# 增加rain部分的随即抖动data_x = [df[col].dropna().values for col in df.columns]for i in range(len(data_x)):idxs = np.arange(len(data_x[i]))out = data_x[i].astype(float)out.flat[idxs] += np.random.uniform(low=-0.1, high=0.1, size=len(idxs))data_x[i] = out# 计算统计信息statistics = {'Feature': [],'Mean': [],'Variance': [],'StdDev': [],'Min': [],'Max': [],'25th Percentile': [],'50th Percentile (Median)': [],'75th Percentile': [],'SEM': []}for idx, col in enumerate(new_column_names):statistics['Feature'].append(col)statistics['Mean'].append(np.mean(data_x[idx]))statistics['Variance'].append(np.var(data_x[idx]))statistics['StdDev'].append(np.std(data_x[idx]))statistics['Min'].append(np.min(data_x[idx]))statistics['Max'].append(np.max(data_x[idx]))statistics['25th Percentile'].append(np.percentile(data_x[idx], 25))statistics['50th Percentile (Median)'].append(np.percentile(data_x[idx], 50))statistics['75th Percentile'].append(np.percentile(data_x[idx], 75))# 计算SEM (标准误差)n = len(data_x[idx]) # 样本数量sem = np.std(data_x[idx]) / np.sqrt(n) # 计算SEMstatistics['SEM'].append(sem)statistics_df = pd.DataFrame(statistics)# 创建图片ax = fig.add_axes([0.1, 0.61, 0.42, 0.3])# ------------------------------# 绘制 violin 图(雨图)vp = ax.violinplot(data_x, points=500, showmeans=False, widths=1.1,# 控制小提琴图的宽度showextrema=False, showmedians=False, vert=True)for idx, b in enumerate(vp['bodies']):b.set_color(colors[idx])b.get_paths()[0].vertices[:, 0] = np.clip(b.get_paths()[0].vertices[:, 0], idx+1, idx+2)# ------------------------------# 绘制散点图(滴点)for idx, y_vals in enumerate(data_x):x = np.full(len(y_vals), idx + 0.8)x += np.random.uniform(low=-0.15, high=0.15, size=len(x))# low和high调整散点图抖动范围ax.scatter(x, y_vals, s=2, c=colors[idx], alpha=0.5)# ------------------------------# 绘制箱型图(箱图)box_positions = np.arange(0.8, len(new_column_names)+0.8) # 控制箱型图的左右偏移bp = ax.boxplot(data_x, vert=True, patch_artist=True, positions=box_positions,widths=0.2, showfliers=False,# 控制箱型图宽度boxprops=dict(facecolor='none', color='black'),medianprops=dict(color='black'),whiskerprops=dict(color='black'),capprops=dict(color='black'))# ---------------------------ax.invert_yaxis()ylim.reverse()ax.set_ylim([-20,1100])ax.set_xticks(np.arange(1, len(new_column_names)+1))ax.set_xticklabels(new_column_names, rotation=60,fontdict={"family": "SimSun", "size": 20})ax.set_ylabel('y轴标题',fontdict={"family": "SimSun", "size": 20})ax.grid(True, linestyle='--', alpha=0.5)return statistics_df
# 调用函数,返回统计指标dataframe
df1 = plot_raincloud()
备注:
new_column_names
这个变量里装的是列名的列表,封装函数时忘了加进去了,大家使用的时候自己定义下即可,添加y坐标用的