python 取出字串中的所有词
import os
def fun(data, block, dic, user_dic):
n = len(data)
for i in range(n):
for size in range(block, 1, -1): # 从最长的块开始,直到长度为2
if i + size <= n:
same_word = data[i:i+size]
if same_word in dic:
user_dic.add(same_word)
if __name__ == "__main__":
ss = set(['天安门','我爱','天安'])
block = max(len(word) for word in ss)
a = '我爱北京天安门,天安门上太阳升'
user_dic = set()
fun(a, block, ss, user_dic)
print(user_dic)