当前位置: 首页 > news >正文

JAVA实现将富文本内容插入已有word文档并下载(dock4j+jsoup)

JAVA实现将富文本内容插入已有word文档并下载(dock4j+jsoup)

需求描述:

最近公司项目需要开发一个功能,需要将前端保存的富文本内容和目录插入到已有的word文档模版里,并提供下载功能。参考了很多方法,也踩了一些坑,最后使用dock4j+jsoup实现了;因为图片在富文本里保存的是相当路径,需要使用jsoup将富文本的标签解析出来并处理,dock4j无法直接将HTML的路径图片转换成word,所以需要将图片下载,并转换成base64编码格式。

引用依赖:

此处依赖是针对JDK8的,其实也写了一个JDK11的,提交代码的时候发现编译不通过,才想起公司运行的JDK版本是JDK1.8的。(一定要注意依赖版本)

 <dependency><groupId>org.docx4j</groupId><artifactId>docx4j-ImportXHTML</artifactId><version>8.3.10</version><exclusions><exclusion><groupId>com.sun.xml.bind</groupId><artifactId>jaxb-impl</artifactId></exclusion><exclusion><groupId>javax.xml.bind</groupId><artifactId>jaxb-api</artifactId></exclusion></exclusions></dependency><dependency><groupId>org.docx4j</groupId><artifactId>docx4j-JAXB-Internal</artifactId><version>8.3.10</version><exclusions><exclusion><groupId>com.sun.xml.bind</groupId><artifactId>jaxb-impl</artifactId></exclusion></exclusions></dependency><!-- 手动指定新版JAXB依赖 --><dependency><groupId>javax.xml.bind</groupId><artifactId>jaxb-api</artifactId><version>2.3.1</version></dependency><dependency><groupId>com.sun.xml.bind</groupId><artifactId>jaxb-impl</artifactId><version>2.3.8</version></dependency><dependency><groupId>javax.activation</groupId><artifactId>activation</artifactId><version>1.1.1</version></dependency><dependency><groupId>org.docx4j</groupId><artifactId>docx4j-JAXB-ReferenceImpl</artifactId><version>8.3.10</version></dependency><!-- 其他工具 --><dependency><groupId>org.jsoup</groupId><artifactId>jsoup</artifactId><version>1.14.3</version></dependency>

代码实现

 private static final Map<String, String> IMAGE_CACHE = new ConcurrentHashMap<>();private static final ExecutorService IMAGE_EXECUTOR = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 2);public String exportSowToWord(String fileName, HashMap<String, String> param)throws Exception {// 1. 批量获取数据String versionId = param.getOrDefault("versionId", "test");List<CatalogTressDTO> catalogList = zentaoProSowCatalogMapper.queryTreeMode(versionId);// 批量获取所有内容List<String> catalogIds = catalogList.stream().map(CatalogTressDTO::getId).collect(Collectors.toList());Map<String, ZentaoProSowContent> contentMap = zentaoProSowContentMapper.selectList(new LambdaQueryWrapper<ZentaoProSowContent>().in(ZentaoProSowContent::getCatalogId, catalogIds)).stream().collect(Collectors.toMap(ZentaoProSowContent::getCatalogId, Function.identity()));// 2. 构建完整HTML内容StringBuilder contentHtml = new StringBuilder();for (CatalogTressDTO catalog : catalogList) {// 处理标题if (StringUtils.isNotBlank(catalog.getIndentedTitle())) {contentHtml.append(buildHeadingTag(catalog));}// 处理内容ZentaoProSowContent content = contentMap.get(catalog.getId());if (content != null && StringUtils.isNotBlank(content.getContent())) {contentHtml.append(content.getContent());}}// 3. 统一处理图片和HTMLString fullHtml = "<!DOCTYPE html><html><head><meta charset='UTF-8'></head><body>"+ contentHtml.toString() + "</body></html>";String processedHtml = processHtmlWithImages(fullHtml);// 4. 生成Word文档ClassPathResource templateResource = new ClassPathResource("templates/sow_V2.0.docx");WordprocessingMLPackage wordPackage = WordprocessingMLPackage.load(templateResource.getInputStream());MainDocumentPart mainDoc = wordPackage.getMainDocumentPart();// 查找插入位置int insertIndex = findInsertPosition(mainDoc);// 添加HTML内容mainDoc.addAltChunk(AltChunkType.Html, processedHtml.getBytes(), mainDoc, insertIndex);mainDoc.convertAltChunks();ByteArrayOutputStream outputStream = new ByteArrayOutputStream();// 生成目录generateTableOfContents(wordPackage, insertIndex);// 保存文档wordPackage.save(outputStream);return buildResponse(fileName, outputStream.toByteArray());}private String buildHeadingTag(CatalogTressDTO catalog) {int level = catalog.getLevel() != null ? Math.min(Integer.parseInt(catalog.getLevel()), 6) : 1;return String.format("<h%d style='mso-style-name:标题%d'>%s</h%d>",level, level, catalog.getIndentedTitle(), level);}private int findInsertPosition(MainDocumentPart mainDoc) {List<Object> content = mainDoc.getContent();for (int i = 0; i < content.size(); i++) {if (content.get(i) instanceof P) {P p = (P) content.get(i);String text= TextUtils.getText(p);if (text != null && text.contains("插入的内容")) {content.remove(i);  // 移除占位符段落return i+1;          // 返回插入位置}}}return content.size();  // 默认插入到文档末尾}private void generateTableOfContents(WordprocessingMLPackage wordPackage, int insertIndex) throws Exception {TocGenerator tocGenerator = new TocGenerator(wordPackage);Toc.setTocHeadingText("目录");tocGenerator.generateToc(insertIndex - 1, "TOC \\o \"1-3\" \\h \\z \\u ", true);}private String processHtmlWithImages(String html) {Document doc = Jsoup.parse(html);Elements imgs = doc.select("img");// 并行处理图片List<CompletableFuture<Void>> futures = imgs.stream().map(img -> CompletableFuture.runAsync(() -> processImageTag(img), IMAGE_EXECUTOR)).collect(Collectors.toList());// 等待所有任务完成CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();return doc.html();}private void processImageTag(Element img) {try {String src = img.attr("src");if (StringUtils.isBlank(src)) return;String networkUrl = convertToNetworkUrl(src);String base64 = IMAGE_CACHE.computeIfAbsent(networkUrl, this::fetchImageBase64);// 异步获取图片尺寸CompletableFuture<BufferedImage> imageFuture = CompletableFuture.supplyAsync(() -> {try {return ImageIO.read(new URL(networkUrl));} catch (Exception e) {return null;}}, IMAGE_EXECUTOR);BufferedImage image = imageFuture.get(3, TimeUnit.SECONDS);if (image != null) {int scaledWidth = (int) (image.getWidth() * 0.9);int scaledHeight = (int) (image.getHeight() * 0.9);img.attr("width", String.valueOf(scaledWidth)).attr("height", String.valueOf(scaledHeight));}img.attr("src", base64);} catch (Exception e) {img.attr("src", "#error");}}private String fetchImageBase64(String imageUrl) {try (InputStream in = new URL(imageUrl).openStream()) {byte[] bytes = IOUtils.toByteArray(in);String mimeType = getMimeType(imageUrl);return "data:" + mimeType + ";base64," + Base64.getEncoder().encodeToString(bytes);} catch (Exception e) {return "#error";}}// 以下为原有工具方法保持不变private String convertToNetworkUrl(String relativePath) {//富文本保存的是相对路径return "http://10.80.88.93:8090/" + relativePath.replaceFirst("^(?:\\.\\./)+", "");}private String getMimeType(String url) {if (url.endsWith(".png")) return "image/png";if (url.endsWith(".jpg") || url.endsWith(".jpeg")) return "image/jpeg";if (url.endsWith(".gif")) return "image/gif";return "application/octet-stream";}private String buildResponse(String fileName, byte[] content) throws UnsupportedEncodingException {//直接返回文件
//        String encodeFileName = URLEncoder.encode(fileName, "UTF-8").replace("\\+", "%20");
//        HttpHeaders header = new HttpHeaders();
//        header.add("Content-Type", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
//        header.add("Content-Disposition", "attachment; filename=" + encodeFileName);
//        return new ResponseEntity<>(content, header, HttpStatus.OK);//上传到MINISOMultipartFile multipartFile = convertByteArrayToMultipartFile(content, fileName);Result result = fileFeign.addFileByInfo(multipartFile);String id = ((Map<String, Object>) result.getData()).get("id").toString();return id;}public MultipartFile convertByteArrayToMultipartFile(byte[] fileBytes, String filename) {return new MultipartFile() {@Overridepublic String getName() {return "file"; // 表单字段名}@Overridepublic String getOriginalFilename() {return filename;}@Overridepublic String getContentType() {return "application/octet-stream"; // 默认二进制流,可自定义(如 "image/png"}@Overridepublic boolean isEmpty() {return fileBytes == null || fileBytes.length == 0;}@Overridepublic long getSize() {return fileBytes.length;}@Overridepublic byte[] getBytes() throws IOException {return fileBytes;}@Overridepublic InputStream getInputStream() throws IOException {return new ByteArrayInputStream(fileBytes);}@Overridepublic void transferTo(File dest) throws IOException, IllegalStateException {try (FileOutputStream fos = new FileOutputStream(dest)) {fos.write(fileBytes);}}};}
}

相关文章:

  • rabbitmq-集群部署
  • # 力扣:2、 两数相加:Java四种解法详解
  • spring boot 2升级3 记录
  • 驱动开发硬核特训 │ Day 23(下篇): i.MX8MP LCDIFv3 驱动中的 Regulator 系统全解
  • 2025一些热门的AI大模型课程资料推荐(持续更新中)
  • APIC Bond0/Teaming
  • QgraphicsView异步线程加载地图瓦片
  • 1. Msys2环境安装
  • 1.文档搜索软件Everything 的使用介绍
  • Kubernetes》》k8s》》explain查 yaml 参数
  • 第十二届蓝桥杯 2021 C/C++组 空间
  • windows中无法关闭mysql57服务
  • RSS‘25|CMU提出统一空中操作框架:以末端执行器为中心,无人机实现高精度遥操作
  • 算法设计与分析(期末试卷)
  • 用Python做有趣的AI项目 6:AI音乐生成器(LSTM Melody Generator)
  • 界面控件DevExpress WPF v25.1预览 - AI功能增强(语义搜索)
  • cas面试题
  • zynq 7010 PS 串口打印
  • 【ESP32】st7735s + LVGL移植
  • nginx代理websocket时ws遇到仅支持域名访问的处理
  • 总有黑眼圈是因为“虚”吗?怎么睡才能改善?
  • 十四届全国人大常委会举行第四十三次委员长会议 ,听取有关草案和议案审议情况汇报
  • BNEF:亚洲、中东和非洲是电力基础设施投资的最大机会所在
  • 巴防长称中俄可参与克什米尔恐袭事件国际调查,外交部回应
  • 滨江集团去年营收约691亿元,尚未结算的预收房款1253亿元
  • 热点问答|第三轮间接谈判结束,美伊分歧还有多大?