【缓存与数据库结合最终方案】伪从技术
实现伪从技术:基于Binlog的Following表变更监听与缓存更新
技术方案概述
要实现一个专门消费者服务作为Following表的伪从,订阅binlog并在数据变更时更新缓存,可以采用以下技术方案:
主要组件
- MySQL Binlog监听:使用开源工具监听MySQL的binlog
- 消息队列:将变更事件发布到消息队列(可选)
- 消费者服务:处理变更事件并更新缓存
- 缓存系统:Redis或其他缓存解决方案
具体实现步骤
1. 配置MySQL Binlog
首先确保MySQL已开启binlog并配置为ROW模式:
-- 检查当前binlog配置
SHOW VARIABLES LIKE 'log_bin';
SHOW VARIABLES LIKE 'binlog_format';-- 修改my.cnf/my.ini文件
[mysqld]
log-bin=mysql-bin
binlog-format=ROW
server-id=1
2. 使用Java实现Binlog监听
可以使用开源的mysql-binlog-connector-java
库:
<!-- pom.xml 依赖 -->
<dependency><groupId>com.github.shyiko</groupId><artifactId>mysql-binlog-connector-java</artifactId><version>0.25.4</version>
</dependency>
3. 消费者服务实现
import com.github.shyiko.mysql.binlog.BinaryLogClient;
import com.github.shyiko.mysql.binlog.event.*;public class FollowingTableBinlogConsumer {private final BinaryLogClient client;private final CacheService cacheService;public FollowingTableBinlogConsumer(String hostname, int port, String username, String password, CacheService cacheService) {this.cacheService = cacheService;this.client = new BinaryLogClient(hostname, port, username, password);client.registerEventListener(event -> {EventData data = event.getData();if (data instanceof TableMapEventData) {// 表映射事件TableMapEventData tableMapEvent = (TableMapEventData) data;if ("your_database".equals(tableMapEvent.getDatabase()) && "Following".equals(tableMapEvent.getTable())) {// 处理Following表的事件}} else if (data instanceof WriteRowsEventData) {// 插入操作processWriteEvent((WriteRowsEventData) data);} else if (data instanceof UpdateRowsEventData) {// 更新操作processUpdateEvent((UpdateRowsEventData) data);} else if (data instanceof DeleteRowsEventData) {// 删除操作processDeleteEvent((DeleteRowsEventData) data);}});}private void processWriteEvent(WriteRowsEventData data) {// 处理新增关注事件for (Serializable[] row : data.getRows()) {Long followerId = (Long) row[0]; // 假设第一列是follower_idLong followeeId = (Long) row[1]; // 假设第二列是followee_idcacheService.addFollowing(followerId, followeeId);}}private void processUpdateEvent(UpdateRowsEventData data) {// 处理更新事件(如果Following表有更新操作)for (Map.Entry<Serializable[], Serializable[]> row : data.getRows()) {Serializable[] before = row.getKey();Serializable[] after = row.getValue();// 根据业务逻辑处理更新}}private void processDeleteEvent(DeleteRowsEventData data) {// 处理取消关注事件for (Serializable[] row : data.getRows()) {Long followerId = (Long) row[0];Long followeeId = (Long) row[1];cacheService.removeFollowing(followerId, followeeId);}}public void start() {try {client.connect();} catch (IOException e) {throw new RuntimeException("Failed to connect to MySQL binlog", e);}}public void stop() {try {client.disconnect();} catch (IOException e) {// 处理异常}}
}
4. 缓存服务实现
public interface CacheService {void addFollowing(Long followerId, Long followeeId);void removeFollowing(Long followerId, Long followeeId);Set<Long> getFollowings(Long followerId);Set<Long> getFollowers(Long followeeId);
}public class RedisCacheService implements CacheService {private final JedisPool jedisPool;public RedisCacheService(JedisPool jedisPool) {this.jedisPool = jedisPool;}@Overridepublic void addFollowing(Long followerId, Long followeeId) {try (Jedis jedis = jedisPool.getResource()) {// 用户关注列表jedis.sadd("user:" + followerId + ":followings", followeeId.toString());// 用户粉丝列表jedis.sadd("user:" + followeeId + ":followers", followerId.toString());}}@Overridepublic void removeFollowing(Long followerId, Long followeeId) {try (Jedis jedis = jedisPool.getResource()) {// 用户关注列表jedis.srem("user:" + followerId + ":followings", followeeId.toString());// 用户粉丝列表jedis.srem("user:" + followeeId + ":followers", followerId.toString());}}@Overridepublic Set<Long> getFollowings(Long followerId) {try (Jedis jedis = jedisPool.getResource()) {Set<String> followings = jedis.smembers("user:" + followerId + ":followings");return followings.stream().map(Long::valueOf).collect(Collectors.toSet());}}@Overridepublic Set<Long> getFollowers(Long followeeId) {try (Jedis jedis = jedisPool.getResource()) {Set<String> followers = jedis.smembers("user:" + followeeId + ":followers");return followers.stream().map(Long::valueOf).collect(Collectors.toSet());}}
}
5. 服务启动
public class Application {public static void main(String[] args) {// 配置Redis连接池JedisPool jedisPool = new JedisPool("localhost", 6379);CacheService cacheService = new RedisCacheService(jedisPool);// 启动binlog消费者FollowingTableBinlogConsumer consumer = new FollowingTableBinlogConsumer("localhost", 3306, "username", "password", cacheService);consumer.start();// 添加关闭钩子Runtime.getRuntime().addShutdownHook(new Thread(() -> {consumer.stop();jedisPool.close();}));}
}
高级优化方案
1. 引入消息队列(如Kafka)
// 在Binlog消费者中,将事件发布到Kafka
public class KafkaEventPublisher {private final Producer<String, String> producer;public KafkaEventPublisher(String bootstrapServers) {Properties props = new Properties();props.put("bootstrap.servers", bootstrapServers);props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");this.producer = new KafkaProducer<>(props);}public void publishFollowingEvent(String eventType, Long followerId, Long followeeId) {String key = followerId + ":" + followeeId;String value = String.format("{\"eventType\":\"%s\",\"followerId\":%d,\"followeeId\":%d}", eventType, followerId, followeeId);producer.send(new ProducerRecord<>("following-events", key, value));}public void close() {producer.close();}
}// 然后有独立的消费者服务从Kafka消费并更新缓存
2. 处理初始数据同步
// 在服务启动时,先全量同步Following表数据到缓存
public void initialSync() {// 从数据库读取所有Following关系List<Following> allFollowings = followingRepository.findAll();// 批量写入缓存try (Jedis jedis = jedisPool.getResource()) {Pipeline pipeline = jedis.pipelined();for (Following following : allFollowings) {pipeline.sadd("user:" + following.getFollowerId() + ":followings", following.getFolloweeId().toString());pipeline.sadd("user:" + following.getFolloweeId() + ":followers", following.getFollowerId().toString());}pipeline.sync();}
}
3. 监控与容错
- 记录binlog位置,以便重启后从正确位置继续
- 实现重试机制处理缓存更新失败
- 添加监控指标跟踪事件处理延迟和错误率
总结
这个方案实现了Following表的伪从技术,通过监听MySQL binlog实时捕获数据变更,并更新Redis缓存。这种架构具有以下优点:
- 低延迟:几乎实时同步数据库变更
- 解耦:消费者服务独立于主业务服务
- 可扩展:可以轻松添加更多消费者处理不同业务逻辑
- 高性能:Redis提供了高效的关系数据存储和查询
根据业务规模,可以选择简单的直接更新缓存方案,或者引入消息队列的更复杂架构。
经过对数据库设计、缓存设计的详细论证,总结并提炼出缓存与数据库结合的最终方案。
伪从方案应用场景如:用户关系服务,关注与取消关注的接口。
- 即接口直接更新数据库Following表即响应用户,后续流程对用户来说是完全异步的。
- Follower表、计数服务、Redis缓存会依赖Following表产生的binlog日志分别更新数据。
关于Binlog监听在服务重启/暂停时的数据丢失问题
Binlog监听在服务重启或暂停时是否会导致数据丢失,取决于具体的实现方式和配置。下面我将详细分析这个问题及解决方案。
关键影响因素
1. Binlog位置记录
- 不记录位置:如果服务没有记录已处理的binlog位置,重启后会从当前最新的binlog位置开始,导致中间变更丢失
- 记录位置:正确记录binlog位置可以确保重启后从断点继续
2. MySQL binlog保留策略
expire_logs_days
参数决定binlog保留天数- 如果binlog被过早清除,而服务长时间停机,可能导致无法恢复
3. 事务完整性
- 部分处理的事务在重启后可能导致不一致
解决方案
1. 持久化binlog位置
修改之前的消费者服务,增加位置记录功能:
public class FollowingTableBinlogConsumer {// 增加binlog位置存储接口private final BinlogPositionStore positionStore;public FollowingTableBinlogConsumer(..., BinlogPositionStore positionStore) {this.positionStore = positionStore;// 设置binlog文件名和位置BinlogPosition position = positionStore.getPosition();if (position != null) {client.setBinlogFilename(position.getFilename());client.setBinlogPosition(position.getPosition());}client.registerEventListener(event -> {// 处理事件...// 记录位置if (event.getHeader().getEventType() == EventType.ROTATE) {RotateEventData rotateEvent = (RotateEventData) event.getData();positionStore.savePosition(new BinlogPosition(rotateEvent.getBinlogFilename(), rotateEvent.getBinlogPosition()));} else if (event.getHeader().getEventType() != EventType.FORMAT_DESCRIPTION) {positionStore.savePosition(new BinlogPosition(client.getBinlogFilename(), event.getHeader().getNextPosition()));}});}
}// Binlog位置存储接口
public interface BinlogPositionStore {void savePosition(BinlogPosition position);BinlogPosition getPosition();
}// 简单的文件存储实现
public class FileBinlogPositionStore implements BinlogPositionStore {private final File positionFile;public FileBinlogPositionStore(String filePath) {this.positionFile = new File(filePath);}@Overridepublic void savePosition(BinlogPosition position) {try (ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(positionFile))) {out.writeObject(position);} catch (IOException e) {throw new RuntimeException("Failed to save binlog position", e);}}@Overridepublic BinlogPosition getPosition() {if (!positionFile.exists()) return null;try (ObjectInputStream in = new ObjectInputStream(new FileInputStream(positionFile))) {return (BinlogPosition) in.readObject();} catch (Exception e) {throw new RuntimeException("Failed to read binlog position", e);}}
}// Binlog位置对象
public class BinlogPosition implements Serializable {private final String filename;private final long position;// constructor, getters...
}
2. MySQL配置优化
确保MySQL配置合理:
-- 设置足够的binlog保留时间(根据业务需求调整)
SET GLOBAL expire_logs_days = 7;-- 或使用新的变量(MySQL 8.0+)
SET GLOBAL binlog_expire_logs_seconds = 604800; -- 7天
3. 启动时数据校验和修复
服务启动时增加校验逻辑:
public void start() {// 检查binlog位置是否有效BinlogPosition position = positionStore.getPosition();if (position != null) {if (!isBinlogFileExists(position.getFilename())) {// 执行全量同步initialSync();positionStore.clearPosition();}}client.connect();
}private boolean isBinlogFileExists(String filename) {// 实现检查binlog文件是否存在的逻辑// 可以通过SHOW BINARY LOGS命令获取当前存在的binlog文件列表
}
4. 优雅停机处理
确保服务停止时正确处理:
public void stop() {try {// 等待当前事件处理完成client.disconnect();// 确保最后的位置已保存positionStore.flush();} catch (IOException e) {// 处理异常}
}
高级保障方案
1. 引入事务表记录处理状态
创建一张事务记录表:
CREATE TABLE binlog_consumer_state (consumer_id VARCHAR(100) PRIMARY KEY,binlog_filename VARCHAR(100) NOT NULL,binlog_position BIGINT NOT NULL,last_heartbeat TIMESTAMP NOT NULL,processed_checksum VARCHAR(100)
);
2. 定期检查点(checkpoint)
// 每处理N个事件或每隔M秒记录一次完整状态
private void checkpoint(Event event) {// 计算当前已处理数据的校验和String checksum = computeChecksum();// 更新数据库状态jdbcTemplate.update("INSERT INTO binlog_consumer_state VALUES (?, ?, ?, NOW(), ?) " +"ON DUPLICATE KEY UPDATE binlog_filename=?, binlog_position=?, last_heartbeat=NOW(), processed_checksum=?",consumerId, client.getBinlogFilename(), event.getHeader().getNextPosition(), checksum,client.getBinlogFilename(), event.getHeader().getNextPosition(), checksum);
}
3. 数据修复机制
当检测到不一致时:
public void repairIfNeeded() {// 从数据库获取最后处理的状态StateRecord state = getLastStateFromDB();// 从缓存获取最后处理的状态StateRecord cacheState = getLastStateFromCache();if (!state.equals(cacheState)) {// 执行修复逻辑executeRepair(state);}
}
总结
正确实现的Binlog监听服务在重启/暂停时不会丢失数据,但需要:
- 持久化记录binlog位置(文件名+偏移量)
- 配置足够的binlog保留时间
- 实现优雅的停机和恢复机制
- 考虑增加校验和修复逻辑(针对关键业务)
建议的完整方案:
- 使用混合位置存储(本地文件+数据库)
- 定期检查点
- 启动时数据校验
- 足够的binlog保留期
- 监控binlog消费延迟
这样即使在服务重启、暂停甚至长时间停机后,也能保证数据不会丢失,并能从正确的位置恢复处理。