基于 Python 的实现:居民用电量数据分析与可视化
基于 Python 的实现:居民用电量数据分析与可视化
本文将介绍如何利用 Python 技术栈(包括 pymysql、pandas、matplotlib 等库)对居民用电量数据进行分析和可视化,以帮助我们更好地理解用电行为模式。
数据准备
在MySQL数据库中创建数据,,数据库表结构如下:
date
:记录日期resident
:居民标识consumption
:当日用电量(千瓦时)
-- 创建表语句..
CREATE TABLE `daily_electricity` (`id` int NOT NULL AUTO_INCREMENT,`date` date NOT NULL,`resident` varchar(50) NOT NULL,`consumption` int NOT NULL,PRIMARY KEY (`id`),UNIQUE KEY `unique_record` (`date`,`resident`)
) ENGINE=InnoDB AUTO_INCREMENT=601 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;-- 插入数据语句..INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (1, '2025-02-27', '居民1', 26);
-- 更多数据记录...
读取数据
通过 pymysql 库连接数据库并获取数据:
import pymysqldef fetch_data_from_db():"""从数据库中读取用电量数据"""connection = pymysql.connect(**DB_CONFIG)data = []try:with connection.cursor() as cursor:cursor.execute("SELECT date, resident, consumption FROM daily_electricity ORDER BY date, resident")data = cursor.fetchall()finally:connection.close()return data
数据处理
将获取的数据转换为 pandas DataFrame 格式,便于后续分析:
import pandas as pddef convert_to_dataframe(data):"""将数据转换为 DataFrame 格式"""df = pd.DataFrame(data, columns=['date', 'resident', 'consumption'])df['date'] = pd.to_datetime(df['date'])return df
数据可视化
1. 折线图:居民用电量趋势
通过折线图可以直观观察每个居民在不同日期的用电量变化趋势:
def plot_trends(df):plt.figure(figsize=(12, 6))for resident, group in df.groupby('resident'):plt.plot(group['date'], group['consumption'], marker='o', linestyle='-', label=resident)plt.title('居民每日用电量趋势')plt.xlabel('日期')plt.ylabel('用电量 (千瓦时)')plt.legend()plt.xticks(rotation=45)plt.grid(True)plt.tight_layout()plt.show()
2. 柱状图:最近 7 天平均用电量对比
展示最近一周内各居民的平均用电量对比:
def plot_bar_chart(df):recent_df = df[df['date'] >= df['date'].max() - pd.Timedelta(days=7)]avg_consumption = recent_df.groupby('resident')['consumption'].mean().reset_index()plt.figure(figsize=(10, 5))plt.bar(avg_consumption['resident'], avg_consumption['consumption'], color='orange')plt.title('最近 7 天各居民平均用电量对比')plt.xlabel('居民')plt.ylabel('平均用电量 (千瓦时)')plt.xticks(rotation=0)plt.grid(True, axis='y')plt.tight_layout()plt.show()
3. 饼图:总用电量分布
展示各居民在统计周期内的总用电量占比:
def plot_pie_chart(df):total_consumption = df.groupby('resident')['consumption'].sum()plt.figure(figsize=(8, 8))plt.pie(total_consumption, labels=total_consumption.index, autopct='%1.1f%%', startangle=90, shadow=True, explode=[0.05] * len(total_consumption))plt.title('各居民总用电量占比')plt.axis('equal')plt.tight_layout()plt.show()
4. 热力图:每周用电量分布
通过热力图观察不同星期几和不同周的用电量分布模式:
def plot_heatmap(df):df['weekday'] = df['date'].dt.day_name()df['week_number'] = df['date'].dt.isocalendar().weekweekly_data = df.groupby(['week_number', 'weekday'])['consumption'].mean().unstack()weekday_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']weekly_data = weekly_data.reindex(columns=weekday_order)plt.figure(figsize=(12, 8))plt.imshow(weekly_data, cmap='YlOrRd')plt.colorbar(label='平均用电量 (千瓦时)')plt.title('每周用电量热力图')plt.xticks(np.arange(len(weekday_order)), weekday_order, rotation=45)plt.yticks(np.arange(len(weekly_data.index)), weekly_data.index)plt.xlabel('星期')plt.ylabel('周数')for i in range(len(weekly_data.index)):for j in range(len(weekday_order)):if not pd.isna(weekly_data.iloc[i, j]):plt.text(j, i, f"{weekly_data.iloc[i, j]:.1f}", ha="center", va="center", color="black" if weekly_data.iloc[i, j] < 30 else "white")plt.tight_layout()plt.show()
5. 箱线图:用电量分布情况
展示各居民用电量的分布特征(中位数、四分位数、异常值等):
def plot_boxplot(df):plt.figure(figsize=(10, 6))plt.boxplot([df[df['resident'] == resident]['consumption'] for resident in df['resident'].unique()], labels=df['resident'].unique())plt.title('各居民用电量分布箱线图')plt.xlabel('居民')plt.ylabel('用电量 (千瓦时)')plt.grid(True, linestyle='--')plt.tight_layout()plt.show()
主程序与执行流程
完整的代码执行流程如下:
if __name__ == "__main__":# 从数据库读取数据fetched_data = fetch_data_from_db()df = convert_to_dataframe(fetched_data)# 绘制各种图表plot_trends(df)plot_bar_chart(df)plot_pie_chart(df)plot_heatmap(df)plot_boxplot(df)
通过多种图表形式直观展示用电模式和趋势。这种分析方法有助于:
- 识别用电高峰和低谷时段
- 发现异常用电行为
- 制定个性化的节能策略
- 优化社区能源分配
共享源代码:
1、数据库数据:
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (1, '2025-02-27', '居民1', 26);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (2, '2025-02-27', '居民2', 35);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (3, '2025-02-27', '居民3', 28);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (4, '2025-02-27', '居民4', 33);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (5, '2025-02-27', '居民5', 25);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (6, '2025-02-28', '居民1', 29);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (7, '2025-02-28', '居民2', 35);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (8, '2025-02-28', '居民3', 33);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (9, '2025-02-28', '居民4', 37);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (10, '2025-02-28', '居民5', 32);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (11, '2025-03-01', '居民1', 23);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (12, '2025-03-01', '居民2', 24);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (13, '2025-03-01', '居民3', 17);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (14, '2025-03-01', '居民4', 20);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (15, '2025-03-01', '居民5', 19);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (16, '2025-03-02', '居民1', 37);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (17, '2025-03-02', '居民2', 32);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (18, '2025-03-02', '居民3', 31);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (19, '2025-03-02', '居民4', 40);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (20, '2025-03-02', '居民5', 33);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (21, '2025-03-03', '居民1', 30);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (22, '2025-03-03', '居民2', 31);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (23, '2025-03-03', '居民3', 29);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (24, '2025-03-03', '居民4', 25);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (25, '2025-03-03', '居民5', 21);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (26, '2025-03-04', '居民1', 20);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (27, '2025-03-04', '居民2', 25);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (28, '2025-03-04', '居民3', 18);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (29, '2025-03-04', '居民4', 20);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (30, '2025-03-04', '居民5', 25);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (31, '2025-03-05', '居民1', 17);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (32, '2025-03-05', '居民2', 20);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (33, '2025-03-05', '居民3', 17);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (34, '2025-03-05', '居民4', 20);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (35, '2025-03-05', '居民5', 16);
INSERT INTO `jm_ysl`.`daily_electricity` (