聊聊Spring AI Alibaba的FeiShuDocumentReader
序
本文主要研究一下Spring AI Alibaba的FeiShuDocumentReader
FeiShuDocumentReader
community/document-readers/spring-ai-alibaba-starter-document-reader-larksuite/src/main/java/com/alibaba/cloud/ai/reader/feishu/FeiShuDocumentReader.java
public class FeiShuDocumentReader implements DocumentReader {private static final Logger log = LoggerFactory.getLogger(FeiShuDocumentReader.class);private final FeiShuResource feiShuResource;private final Client client;private String documentId;private String userAccessToken;private String tenantAccessToken;public FeiShuDocumentReader(FeiShuResource feiShuResource) {this.feiShuResource = feiShuResource;this.client = feiShuResource.buildDefaultFeiShuClient();}public FeiShuDocumentReader(FeiShuResource feiShuResource, String documentId, String userAccessToken,String tenantAccessToken) {this(feiShuResource);this.documentId = documentId;this.userAccessToken = userAccessToken;this.tenantAccessToken = tenantAccessToken;}public FeiShuDocumentReader(FeiShuResource feiShuResource, String userAccessToken) {this(feiShuResource);this.userAccessToken = userAccessToken;}public FeiShuDocumentReader(FeiShuResource feiShuResource, String userAccessToken, String documentId) {this(feiShuResource);this.userAccessToken = userAccessToken;this.documentId = documentId;}/*** use tenant_access_token access [tenant identity]* @param documentId documentId* @param userAccessToken userAccessToken* @return String*/public Document getDocumentContentByUser(String documentId, String userAccessToken) throws Exception {RawContentDocumentReq req = RawContentDocumentReq.newBuilder().documentId(documentId).lang(0).build();RawContentDocumentResp resp = client.docx().document().rawContent(req, RequestOptions.newBuilder().userAccessToken(userAccessToken).build());if (!resp.success()) {System.out.printf("code:%s,msg:%s,reqId:%s, resp:%s%n", resp.getCode(), resp.getMsg(), resp.getRequestId(),Jsons.createGSON(true, false).toJson(JsonParser.parseString(new String(resp.getRawResponse().getBody(), StandardCharsets.UTF_8))));throw new Exception(resp.getMsg());}return toDocument(Jsons.DEFAULT.toJson(resp.getData()));}/*** use tenant_access_token [tenant identity]* @param documentId documentId* @param tenantAccessToken tenantAccessToken* @return String*/public Document getDocumentContentByTenant(String documentId, String tenantAccessToken) throws Exception {RawContentDocumentReq req = RawContentDocumentReq.newBuilder().documentId(documentId).lang(0).build();RawContentDocumentResp resp = client.docx().document().rawContent(req, RequestOptions.newBuilder().tenantAccessToken(tenantAccessToken).build());if (!resp.success()) {System.out.printf("code:%s,msg:%s,reqId:%s, resp:%s%n", resp.getCode(), resp.getMsg(), resp.getRequestId(),Jsons.createGSON(true, false).toJson(JsonParser.parseString(new String(resp.getRawResponse().getBody(), StandardCharsets.UTF_8))));throw new Exception(resp.getMsg());}return toDocument(Jsons.DEFAULT.toJson(resp.getData()));}/*** get document list* @param userAccessToken userAccessToken* @return String*/public Document getDocumentListByUser(String userAccessToken) throws Exception {ListFileReq req = ListFileReq.newBuilder().orderBy("EditedTime").direction("DESC").build();ListFileResp resp = client.drive().file().list(req, RequestOptions.newBuilder().userAccessToken(userAccessToken).build());if (!resp.success()) {System.out.printf("code:%s,msg:%s,reqId:%s, resp:%s%n", resp.getCode(), resp.getMsg(), resp.getRequestId(),Jsons.createGSON(true, false).toJson(JsonParser.parseString(new String(resp.getRawResponse().getBody(), StandardCharsets.UTF_8))));throw new Exception(resp.getMsg());}return toDocument(Jsons.DEFAULT.toJson(resp.getData()));}private Document toDocument(String docText) {return new Document(docText);}@Overridepublic List<Document> get() {List<Document> documents = new ArrayList<>();if (this.feiShuResource != null) {loadDocuments(documents, this.feiShuResource);}return documents;}private void loadDocuments(List<Document> documents, FeiShuResource feiShuResource) {String appId = feiShuResource.getAppId();String appSecret = feiShuResource.getAppSecret();String source = format("feishu://%s/%s", appId, appSecret);try {documents.add(new Document(source));if (this.userAccessToken != null) {documents.add(getDocumentListByUser(userAccessToken));}else {log.info("userAccessToken is null");}if (this.tenantAccessToken != null && this.documentId != null) {documents.add(getDocumentContentByTenant(documentId, tenantAccessToken));}else {log.info("tenantAccessToken or documentId is null");}if (this.userAccessToken != null && this.documentId != null) {documents.add(getDocumentContentByUser(documentId, userAccessToken));}else {log.info("userAccessToken or documentId is null");}}catch (Exception e) {log.warn("Failed to load an object with appId: {}, appSecret: {},{}", appId, appSecret, e.getMessage(), e);}}}
FeiShuDocumentReader构造器依赖FeiShuResource,其get方法通过loadDocuments将feiShuResource解析为documents,它通过
com.lark.oapi.Client
根据userAccessToken或tenantAccessToken去读取文档
FeiShuResource
community/document-readers/spring-ai-alibaba-starter-document-reader-larksuite/src/main/java/com/alibaba/cloud/ai/reader/feishu/FeiShuResource.java
public class FeiShuResource implements Resource {public static final String SOURCE = "source";public static final String FEISHU_PROPERTIES_PREFIX = "spring.ai.alibaba.plugin.feishu";private final String appId;private final String appSecret;//......
}
FeiShuResource定义了appId、appSecret属性
示例
@EnabledIfEnvironmentVariable(named = "FEISHU_APP_ID", matches = ".+")
@EnabledIfEnvironmentVariable(named = "FEISHU_APP_SECRET", matches = ".+")
public class FeiShuDocumentReaderTest {private static final Logger log = LoggerFactory.getLogger(FeiShuDocumentReaderTest.class);// Get configuration from environment variablesprivate static final String FEISHU_APP_ID = System.getenv("FEISHU_APP_ID");private static final String FEISHU_APP_SECRET = System.getenv("FEISHU_APP_SECRET");// Optional user token and document ID from environment variablesprivate static final String FEISHU_USER_TOKEN = System.getenv("FEISHU_USER_TOKEN");private static final String FEISHU_DOCUMENT_ID = System.getenv("FEISHU_DOCUMENT_ID");private FeiShuDocumentReader feiShuDocumentReader;private FeiShuResource feiShuResource;static {if (FEISHU_APP_ID == null || FEISHU_APP_SECRET == null) {System.out.println("FEISHU_APP_ID or FEISHU_APP_SECRET environment variable is not set. Tests will be skipped.");}}@BeforeEachvoid setup() {// Skip test if environment variables are not setAssumptions.assumeTrue(FEISHU_APP_ID != null && !FEISHU_APP_ID.isEmpty(),"Skipping test because FEISHU_APP_ID is not set");Assumptions.assumeTrue(FEISHU_APP_SECRET != null && !FEISHU_APP_SECRET.isEmpty(),"Skipping test because FEISHU_APP_SECRET is not set");// Create FeiShuResource with environment variablesfeiShuResource = FeiShuResource.builder().appId(FEISHU_APP_ID).appSecret(FEISHU_APP_SECRET).build();}@Testvoid feiShuDocumentTest() {feiShuDocumentReader = new FeiShuDocumentReader(feiShuResource);List<Document> documentList = feiShuDocumentReader.get();log.info("result:{}", documentList);}@Testvoid feiShuDocumentTestByUserToken() {// Skip test if user token is not setAssumptions.assumeTrue(FEISHU_USER_TOKEN != null && !FEISHU_USER_TOKEN.isEmpty(),"Skipping test because FEISHU_USER_TOKEN is not set");feiShuDocumentReader = new FeiShuDocumentReader(feiShuResource, FEISHU_USER_TOKEN);List<Document> documentList = feiShuDocumentReader.get();log.info("result:{}", documentList);}@Testvoid feiShuDocumentTestByUserTokenAndDocumentId() {// Skip test if user token or document ID is not setAssumptions.assumeTrue(FEISHU_USER_TOKEN != null && !FEISHU_USER_TOKEN.isEmpty(),"Skipping test because FEISHU_USER_TOKEN is not set");Assumptions.assumeTrue(FEISHU_DOCUMENT_ID != null && !FEISHU_DOCUMENT_ID.isEmpty(),"Skipping test because FEISHU_DOCUMENT_ID is not set");feiShuDocumentReader = new FeiShuDocumentReader(feiShuResource, FEISHU_USER_TOKEN, FEISHU_DOCUMENT_ID);List<Document> documentList = feiShuDocumentReader.get();log.info("result:{}", documentList);}}
小结
spring-ai-alibaba-starter-document-reader-larksuite提供了FeiShuDocumentReader用于根据userAccessToken或tenantAccessToken读取飞书文档列表或者指定documentId的文档内容。
doc
- java2ai