当前位置: 首页 > news >正文

excel解析图片pdf附件不怕

背景

工作中肯定会有导入excel还附带图片附件的下面是我解析的excel,支持图片、pdf、压缩文件

实现

依次去解析excel,看看也没有附件,返回的格式是Map,key是第几行,value是附件list附件格式都被解析成pdf格式

Reader.java


package com.ruoyi.srm.service;import java.util.List;import org.apache.poi.ss.usermodel.Workbook;import com.ruoyi.srm.domain.req.CapacityReceivingReq.FileListBean;public interface Reader {/*** @param workbook* @param targetRow 目标行索引(例如第3行,索引从0开始)* @return*/List<FileListBean> read(Workbook workbook, int targetCol);}

ReaderComposite.java


package com.ruoyi.srm.service.impl;import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;import org.apache.poi.ss.usermodel.Workbook;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;import com.ruoyi.srm.domain.req.CapacityReceivingReq.FileListBean;
import com.ruoyi.srm.service.Reader;@Component
public class ReaderComposite {@Autowiredprivate List<Reader> readerList;/*** @param workbook* @param targetRow 目标行索引(例如第3行,索引从0开始)* @return*/public Map<String, List<FileListBean>> read(Workbook workbook, int targetCol) {return readerList.stream().map(reader -> reader.read(workbook, targetCol)).flatMap(Collection::stream).collect(Collectors.groupingBy(t -> t.getLine() + ""));}}

ImageReader.java


package com.ruoyi.srm.service.impl;import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.Base64;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFClientAnchor;
import org.apache.poi.xssf.usermodel.XSSFDrawing;
import org.apache.poi.xssf.usermodel.XSSFPicture;
import org.apache.poi.xssf.usermodel.XSSFPictureData;
import org.apache.poi.xssf.usermodel.XSSFShape;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.springframework.boot.system.ApplicationHome;
import org.springframework.stereotype.Component;import com.ruoyi.srm.domain.req.CapacityReceivingReq.FileListBean;
import com.ruoyi.srm.service.Reader;import cn.hutool.core.io.FileUtil;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;@Slf4j
@Component
public class ImageReader implements Reader {/*** @param workbook* @param targetRow 目标行索引(例如第3行,索引从0开始)* @return*/@Override@SneakyThrowspublic List<FileListBean> read(Workbook workbook, int targetCol) {ApplicationHome home = new ApplicationHome();String rootPath = home.getDir().getAbsolutePath() + File.separator + "extract" + File.separator;List<FileListBean> result = new ArrayList<>();Map<String, AtomicInteger> counter = new HashMap<>();// 指定要读取图片的工作表和单元格位置Sheet sheet = workbook.getSheetAt(0); // 第一个工作表// 遍历所有绘图对象(包含图片)if (sheet instanceof XSSFSheet) {XSSFSheet xssfSheet = (XSSFSheet) sheet;XSSFDrawing drawing = xssfSheet.getDrawingPatriarch();if (drawing != null) {// 遍历所有形状(包括图片)String dir = rootPath + "_" + System.currentTimeMillis();for (XSSFShape shape : drawing.getShapes()) {if (shape instanceof XSSFPicture) {XSSFPicture picture = (XSSFPicture) shape;XSSFClientAnchor anchor = (XSSFClientAnchor) picture.getAnchor();// 检查图片的左上角是否在目标单元格int targetRow = anchor.getRow1();if (anchor.getCol1() == targetCol) {AtomicInteger integer = counter.computeIfAbsent(targetRow + "_" + targetCol, k -> new AtomicInteger());// 提取图片数据XSSFPictureData pictureData = picture.getPictureData();byte[] imageBytes = pictureData.getData();// 保存图片到本地new File(dir).mkdirs();String filePath = dir + File.separator + "image_" + (targetRow + 1) + "_" + targetCol + "_" + integer.incrementAndGet() + "." + pictureData.suggestFileExtension();try (FileOutputStream out = new FileOutputStream(filePath)) {out.write(imageBytes);log.info("第{}行图片已保存到: {}", targetRow + 1, filePath);String encodeToString = Base64.getEncoder().encodeToString(FileUtil.readBytes(filePath));String mimeType = FileUtil.getMimeType(filePath);if ("image/jpeg".equals(mimeType)) {encodeToString = "data:image/png;base64," + encodeToString;}result.add(new FileListBean().setFileName(new File(filePath).getName()).setContent(encodeToString).setLine(targetRow));}}}}}}return result;}
}

AttachmentReader.java


package com.ruoyi.srm.service.impl;import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.poi.ooxml.POIXMLDocumentPart;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFClientAnchor;
import org.apache.poi.xssf.usermodel.XSSFDrawing;
import org.apache.poi.xssf.usermodel.XSSFObjectData;
import org.apache.poi.xssf.usermodel.XSSFShape;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.tika.Tika;
import org.springframework.boot.system.ApplicationHome;
import org.springframework.stereotype.Component;import com.ruoyi.srm.domain.req.CapacityReceivingReq.FileListBean;
import com.ruoyi.srm.service.Reader;import cn.hutool.core.io.FileUtil;
import lombok.Cleanup;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;@Slf4j
@Component
public class AttachmentReader implements Reader {/*** @param workbook* @param targetRow 目标行索引(例如第3行,索引从0开始)* @return*/@Override@SneakyThrowspublic List<FileListBean> read(Workbook workbook, int targetCol) {ApplicationHome home = new ApplicationHome();String rootPath = home.getDir().getAbsolutePath() + File.separator + "extract" + File.separator;List<FileListBean> result = new ArrayList<>();Map<String, AtomicInteger> counter = new HashMap<>();// 1. 获取所有嵌入对象Sheet sheet = workbook.getSheetAt(0); // 第一个工作表// 1. 获取所有嵌入对象XSSFSheet xssfSheet = (XSSFSheet) sheet;List<POIXMLDocumentPart> relationList = xssfSheet.getRelations();// 在遍历嵌入对象时,检查锚点位置for (POIXMLDocumentPart part : relationList) {if (part instanceof XSSFDrawing) {XSSFDrawing drawing = (XSSFDrawing) part;for (XSSFShape shape : drawing.getShapes()) {if (shape instanceof XSSFObjectData) {XSSFObjectData objData = (XSSFObjectData) shape;XSSFClientAnchor anchor = (XSSFClientAnchor) objData.getAnchor();// 检查锚点是否在目标位置(例如第3行第2列,即B3)int targetRow = anchor.getRow1(); // 行索引从0开始if (anchor.getCol1() == targetCol) {AtomicInteger integer = counter.computeIfAbsent(targetRow + "_" + targetCol, k -> new AtomicInteger());// 提取并保存文件byte[] objectData = objData.getObjectData();@CleanupPOIFSFileSystem poifs = new POIFSFileSystem(new ByteArrayInputStream(objectData));String symbol = "\u0001Ole10Native";if (poifs.getRoot().getEntryNames().contains(symbol)) {InputStream contentStream = poifs.createDocumentInputStream(symbol);String dir = rootPath + "_" + System.currentTimeMillis();new File(dir).mkdirs();String name = "";byte[] byteArray = IOUtils.toByteArray(contentStream);Tika tika = new Tika();String detect = tika.detect(byteArray);System.err.println(detect);if ("application/pdf".equals(detect)) {name = dir + File.separator + "pdf_" + (targetRow + 1) + "_" + targetCol + "_" + integer.incrementAndGet() + ".pdf";} else if ("application/octet-stream".equals(detect)) {
//                                    name = dir + ".zip"; 注释
//                                    @Cleanup
//                                    ZipArchiveInputStream seek = new ZipArchiveInputStream(new ByteArrayInputStream(byteArray));
//                                    try {
//                                        seek.getNextEntry();
//                                    } catch (Exception e) {
//                                        log.debug("解析zip失败.尝试解析成图片");
//                                        name = dir + File.separator + "image_" + (targetRow + 1) + "_" + targetCol + "_" + integer.incrementAndGet() + ".jpg";
//                                    }}@CleanupFileOutputStream out = new FileOutputStream(name);out.write(byteArray);log.info("第{}行{}文件保存成功: {}", targetRow + 1, detect, name);if (name.endsWith(".zip")) {@CleanupZipArchiveInputStream zis = new ZipArchiveInputStream(new FileInputStream(name));ZipArchiveEntry entry;while ((entry = zis.getNextEntry()) != null) {if (entry.isDirectory()) {log.warn("是目录");} else {// 如果是文件,则解压文件File file = new File(dir, entry.getName());try (FileOutputStream out2 = new FileOutputStream(file)) {byte[] buffer2 = new byte[1024];int len;while ((len = zis.read(buffer2)) > 0) {out2.write(buffer2, 0, len);}}log.info("第{}提取{}已保存到: {}", targetRow + 1, entry.getName(), file.getAbsolutePath());}}}// 转base64Arrays.stream(FileUtil.ls(dir)).forEach(item -> {// System.err.println(item.getName());extracted(result, targetRow, item);});}}}}}}return result;}private static void extracted(List<FileListBean> result, int targetRow, File item) {String path = item.getPath();String encodeToString = Base64.getEncoder().encodeToString(FileUtil.readBytes(path));String mimeType = FileUtil.getMimeType(path);// System.err.println(mimeType);if ("image/jpeg".equals(mimeType)) {encodeToString = "data:image/png;base64," + encodeToString;} else {// System.err.println(encodeToString);}result.add(new FileListBean().setFileName(item.getName()).setContent(encodeToString).setLine(targetRow));}
}

相关文章:

  • 一.学习python工具准备
  • spring cloud gateway前面是否必须要有个nginx
  • ARINC818协议(三)
  • CUDA Driver 安装与升级(CentOS 7)
  • 前端:uniapp框架中<scroll-view>r如何控制元素进行局部滚动
  • rancher 网红无法上传大视频,小于2m可以正常上传
  • vmware17 虚拟机 ubuntu22.04 桥接模式,虚拟机无法接收组播消息
  • 【AI插件开发】Notepad++ AI插件开发实践:支持配置界面
  • OpenBMC:BmcWeb log输出
  • 消息中间件——RocketMQ(二)
  • 笔记本电脑屏幕闪烁是怎么回事 原因及解决方法
  • shiro使用
  • 汽车行驶工况特征参数:从“速度曲线”到“驾驶DNA”的硬核解码
  • 原型模式详解及c++代码实现(以自动驾驶感知场景为例)
  • 如何使用Python进行自动化的系统管理?
  • 布隆过滤器如何删除数据
  • 【认知觉醒】是什么? 如何做到 ? ( 持续更新ing )
  • FPGA(现场可编程门阵列)笔记
  • DDS Discovery数据
  • STL简介 + string【上】
  • 上海古籍书店重新开卷,在这里淘旧书获新知
  • 两大跨国巨头称霸GLP-1市场,国产减肥药的机会在哪?
  • 美国国务卿:乌克兰问题谈判不能一直停滞不前
  • 市场监管总局:在全国集中开展食用植物油突出问题排查整治
  • 北京:义务教育阶段入学将积极为多孩家庭长幼随学创造条件
  • 一季度江西GDP达7927.1亿元,同比增长5.7%