当前位置：首页 > news >正文

用cython将python程序打包成C++动态库（windows+Vistual Studio2017平台）

news 2025/11/5 23:11:20

作为一名程序员我们都知道Python的库可能要比C++的丰富的多特别是在算法方面，但是有的时候我们的工程是用C++开发的，我们又像用Python的这个库那怎么办呢？如果直接调.py程序，工程中代码有.py又有.cpp显得工程很杂乱。那么我么可以借助cython来帮助我们将python程序封装成C++动态库。最近正好用到jieba分词工具，接下来让我们基于结巴一起探讨如何完成python程序封装成C++库。

1.开发环境

操作系统：Windows10
开发IDE：PyCharm、Vistual Studio 2017
开发工具：Python3.6.3、‌C++编译器MSVC14.10

前提条件已经安装了cython和jieba库如果没有安装，可以按照下面方式经行安装

pip install cython
pip install jieba
# 如果安装慢，可以用国内python镜像源
# pip install cython -i https://repo.huaweicloud.com/repository/pypi/simple/
# pip install jieba -i https://repo.huaweicloud.com/repository/pypi/simple/

2.创建安装脚本和cython代码

2.1 创建setup.py脚本呢

在网上看了很多文章，感觉都很复杂，对于我们学习来说可以先从简单开始。我们先创建一个简单的setup.py安装脚本，如下：

# setup.py
from setuptools import setup
from Cython.Build import cythonizesetup(ext_modules=cythonize("text_segment.pyx"),zip_safe=False,
)

2.2 创建Cython脚本

创建一个.pyx格式的文件text_segment.pyx，如下：

# text_segment.pyx
from datetime import datetime
import jieba
import os
import loggingcdef void init_logging(log_dir):if not os.path.exists(log_dir):os.makedirs(log_dir)log_filename = os.path.join(log_dir, datetime.now().strftime("%Y_%m_%d") + ".log")logging.basicConfig(filename=log_filename, level=logging.DEBUG,format='%(asctime)s [%(filename)s Line %(lineno)d] - %(levelname)s - %(message)s')cdef bint check_file_exists(str filename):return os.path.isfile(filename)cdef str read_last_line(str filename):if not check_file_exists(filename):return Nonewith open(filename, 'r', encoding='utf-8') as file:lines = file.readlines()if lines:return lines[-1].strip()return ""cdef void write_segmented_words(list segmented_words, str output_filename):with open(output_filename, 'w', encoding='utf-8') as file:for word in segmented_words:if word.strip() != "":file.write(word + '\n')def segment_text_full(sentence: str):logging.info(f"原始句子：{sentence} ")cdef list full_list = list(jieba.cut(sentence, cut_all=True))logging.info(f"全模式: 【{'/'.join(full_list)}】")write_segmented_words(full_list, "words_txt/cut_all_segmented_words.txt")return full_listdef segment_text_accurate(sentence: str):logging.info(f"原始句子：{sentence} ")cdef list accurate_list = list(jieba.cut(sentence, cut_all=False))logging.info(f"精确模式: 【{'/'.join(accurate_list)}】")write_segmented_words(accurate_list, "words_txt/cut_segmented_words.txt")return accurate_listdef segment_text_search(sentence: str):logging.info(f"原始句子：{sentence} ")cdef list search_list = list(jieba.cut_for_search(sentence))logging.info(f"搜索引擎模式: 【{'/'.join(search_list)}】")write_segmented_words(search_list, "words_txt/cut_search_segmented_words.txt")return search_listdef init_data():init_logging("log")jieba.load_userdict("dict/dict.txt")

3.生成C++动态库

在终端项目目录下执行下面命令：

 E:\jieba_segmentation> python setup.py build

命令成功后会生成build文件夹和text_segment.cp36-win_amd64.pyd（同.dll文件结构一样）文件，build的目录结构如下：

└─build├─lib.win-amd64-3.6└─temp.win-amd64-3.6└─Release

Release目录下会生成text_segment.cp36-win_amd64.lib库文件。

4.C++程序调用

前面步骤顺利的话会生成text_segment.cp36-win_amd64.pyd和text_segment.cp36-win_amd64.lib两个库文件，接下来我们就开始通过C++程序调用Cython生成的C++动态库。

4.1 配置项目依赖

4.1.1配置Python依赖

配置Python头文件：
项目右键属性—>VC++目录—>包含目录，添加Python头文件路径（C:\Users\XXX\AppData\Local\Programs\Python\Python36\include）
配置Python依赖库（.lib静态库）：
（1）项目右键属性—>VC++目录—>引用目录，添加Python静态库路径（C:\Users\XXX\AppData\Local\Programs\Python\Python36\libs）
（2）项目右键属性—>链接器—>输入—>附加依赖项，添加python36.lib

4.1.1配置Cython生成的动态库依赖

4.1.1配置Python依赖

因为Cython生成的库我们没有选择生成.h和.cpp文件，所以不用配置包含目录，之后我们在C++程序中直接调用就可以。
配置依赖库（.lib静态库）：
（1）项目右键属性—>VC++目录—>引用目录，添加Cython生成的text_segment.cp36-win_amd64.lib静态库路径（我的放在这个下面$(ProjectDir)\jieba_cpython\lib）
（2）项目右键属性—>链接器—>输入—>附加依赖项，添加Cython生成的text_segment.cp36-win_amd64.lib名称

4.2 C++测试函数

我们在上边配好的VS2017工程里添加源码文件main.cpp，内容如下：

#include <iostream>
#include <string>
#include <vector>
#include <Windows.h>
#include <Python.h>bool cutPy(const std::string& sentence, std::vector<std::string>& result, int cutType);
int main()
{// 调用python接口std::vector<std::string> pythonFullCutResultVector;bool ok = segmenter.cutPy("我爱自然语言处理", pythonFullCutResultVector, 0);if (!ok){std::cout << "Python 分词失败" << std::endl;}std::cout << "Python 分词: ";result << pythonFullCutResultVector;std::cout << result << std::endl;
}bool cutPy(const std::string& sentence, std::vector<std::string>& result, int cutType)
{// 初始化Python解释器Py_Initialize();// 导入Python模块PyObject *pModule = PyImport_ImportModule("text_segment");if (pModule == NULL){PyErr_Print();std::cerr << "加载python分词模块失败！" << std::endl;Py_Finalize();return false;}PyObject *pInit = PyObject_GetAttrString(pModule, "init_data");if (pInit && PyCallable_Check(pInit)) {PyObject_CallObject(pInit, NULL);}else {if (PyErr_Occurred())PyErr_Print();std::cerr << "不能找到init_data方法" << std::endl;Py_XDECREF(pInit);Py_DECREF(pModule);Py_Finalize();return false;}// 获取python模块中的方法PyObject *pFunc;switch (cutType){case 0:pFunc = PyObject_GetAttrString(pModule, "segment_text_full");break;case 1:pFunc = PyObject_GetAttrString(pModule, "segment_text_accurate");break;case 2:pFunc = PyObject_GetAttrString(pModule, "segment_text_search");break;default:pFunc = PyObject_GetAttrString(pModule, "segment_text_full");break;}if (pFunc == NULL || !PyCallable_Check(pFunc)){if (PyErr_Occurred())PyErr_Print();std::cerr << "不能找到segment_text_full方法" << std::endl;Py_XDECREF(pFunc);Py_DECREF(pModule);Py_Finalize();return false;}// 准备调用函数的输入参数const char* input_sentence = u8"我爱自然语言处理"; // 输入的句子if (input_sentence == NULL){std::cerr << "输入句子为空！" << std::endl;Py_DECREF(pFunc);Py_DECREF(pModule);Py_Finalize();return false;}// 创建Python字符串PyObject *pInput = PyUnicode_FromString(input_sentence);if (pInput == NULL){PyErr_Print();std::cerr << "创建Python字符串失败！" << std::endl;Py_DECREF(pFunc);Py_DECREF(pModule);Py_Finalize();return false;}// 封装参数PyObject *pArgs = PyTuple_Pack(1, pInput); // 将输入封装为元组if (pArgs == NULL){PyErr_Print();std::cerr << "参数封装失败！" << std::endl;Py_DECREF(pInput);Py_DECREF(pFunc);Py_DECREF(pModule);Py_Finalize();return false;}// 调用Python方法PyObject *pValue = PyObject_CallObject(pFunc, pArgs);Py_DECREF(pArgs); // 释放参数引用计数Py_DECREF(pInput); // 释放输入的Python对象if (pValue != NULL){// 处理返回值（假设返回的是一个列表）if (PyList_Check(pValue)){Py_ssize_t size = PyList_Size(pValue);std::cout << "分词结果:";for (Py_ssize_t i = 0; i < size; ++i){PyObject *pItem = PyList_GetItem(pValue, i);const char* c_str = PyUnicode_AsUTF8(pItem);if (!c_str) {PyErr_Print();std::cerr << "转换Unicode到UTF-8失败" << std::endl;continue; // 或其他处理}std::string str(c_str);std::cout << " " << i << ":" << str << " ";result.push_back(str);}std::cout << std::endl;}else{std::cerr << "返回值不是列表类型！" << std::endl;}Py_DECREF(pValue); // 释放返回值引用计数}else{PyErr_Print();std::cerr << "调用Python方法失败！" << std::endl;}// 清理Py_XDECREF(pFunc);Py_DECREF(pModule);Py_Finalize();return true;
}

生成解决方案，运行程序，终端显示如下内容，整个动态库调用完成

Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\THS\AppData\Local\Temp\jieba.cache
Loading model cost 0.506 seconds.
Prefix dict has been built successfully.
分词结果: 0:我  1:爱  2:自然  3:自然语言  4:语言  5:处理
Python 分词: ["我", "爱", "自然", "自然语言", "语言", "处理"]