当前位置：首页 > news >正文

maxscript根据音频创建动画表情

news 来源：原创 2025/4/27 14:06:09

方案1： Python + pydub / Audacity + phoneme recognition 来提取语音中的音素（phonemes）并输出为 JSON 供 3ds Max 使用

方案2: Papagayo输出.pgo 文件，通过 Python 脚本解析，然后转换成 JSON。

下面介绍下方案2：
1、需要软件Papagayo这软件把音频解释成.pgo文件，

import json
import redef parse_ng_pgo_file(path):phonemes = []fps = 24with open(path, 'r') as file:lines = file.readlines()for line in lines:line = line.strip()# 检查是否是音素行，例如 "37 E"match = re.match(r'^(\d+)\s+([A-Z]+)$', line)if match:frame = int(match.group(1))phoneme = match.group(2)time = frame / fpsphonemes.append({"time": round(time, 3), "phoneme": phoneme})return phonemes# 用法
pgo_path = "Papagayo.pgo"
phoneme_data = parse_ng_pgo_file(pgo_path)# 保存为 JSON
with open("output_phonemes.json", "w") as out:json.dump(phoneme_data, out, indent=2)

再用pgo2json.py转为phonemes.json

[{"time": 0.333,"phoneme": "E"},{"time": 0.542,"phoneme": "AI"},
]

2,需要引入 Newtonsoft.Json.dll json解释器
3，定义骨骼的pose库 phonemesPose.json

-- 3. 定义音素姿态（每个音素对应的位置偏移）
#(#("E", #(#( "jaw", [0.886,-10,15.13] ),#( "lipUpper", [-14.243,3.0,-0.604] ),#( "lipLower", [16.014,-2.0,0.001] ))),#("AI", #(#( "jaw", [0,0,0] ),#( "lipUpper", [0,0,0] ),#( "lipLower", [0,0,0] ))),#("L", #(#( "jaw", [0,-6,0] ),#( "lipUpper", [0,1,0] ),#( "lipLower", [0,-1,0] ))),#("FV", #(#( "jaw", [0,-3,0] ),#( "lipUpper", [0,2,0] ),#( "lipLower", [0,-2,0] )))
)

然后用脚本读取 phonemes.json 和phonemesPose.json ，执行动画操作

-- 第1步:引入 Newtonsoft.Json.dll
scriptDir = getFilenamePath (getSourceFileName())
JsonDllPath = scriptDir + @"Newtonsoft.Json.dll"
JsonFilePath = scriptDir + @"phonemes.json"-- 1.一次性读取JSON
fn ReadandProcess JsonFilePath =
(-- 获取 UTF-8 编码对象encoding = dotNetObject  "System.Text.UTF8Encoding"-- 读取文件的所有字节fileBytes = (dotnetClass "System.IO.File").ReadAllBytes(JsonFilePath)    -- 将字节数组转换为字符串jsonText = encoding.GetString(fileBytes)-- 判断 JSON 数据是对象还是数组local jsonTypeif jsonText[1] == "[" thenjsonType = "Newtonsoft.Json.Linq.JArray"elsejsonType = "Newtonsoft.Json.Linq.JObject"-- 解析 JSON 数据local jsonStruct = (dotNetClass jsonType).Parse jsonTextreturn jsonStruct
)resetMaxFile #noPrompt-- 定义音频文件的路径
audioFilePath = @"E:\捕鱼\--捕鱼3D资源--\25.4月工作\根据音频做表情\2Papagayo提取和输出音素\recently_short.wav"
prosound.append audioFilePath
trackview.open "Track View - Curve Editor"dummyCount=10
dummyPrefix = "Dummy_"-- 创建 Dummy 并命名
dummy name:"CTRL_Jaw" POS:[0, 0, 0]
dummy name:"CTRL_Lip_Upper" POS:[0, 0, 10]
dummy name:"CTRL_Lip_Lower" POS:[0, 0, 20]-- 刷新视图
redrawViews()-- 2. 骨骼控制器绑定
global ctrl_jaw = $CTRL_Jaw
global ctrl_lipUpper = $CTRL_Lip_Upper
global ctrl_lipLower = $CTRL_Lip_Lower-- 3. 定义音素姿态（每个音素对应的位置偏移）
global phonemePoses = #(#("E", #(#( "jaw", [0.886,-10,15.13] ),#( "lipUpper", [-14.243,3.0,-0.604] ),#( "lipLower", [16.014,-2.0,0.001] ))),#("AI", #(#( "jaw", [0,0,0] ),#( "lipUpper", [0,0,0] ),#( "lipLower", [0,0,0] ))),#("L", #(#( "jaw", [0,-6,0] ),#( "lipUpper", [0,1,0] ),#( "lipLower", [0,-1,0] ))),#("FV", #(#( "jaw", [0,-3,0] ),#( "lipUpper", [0,2,0] ),#( "lipLower", [0,-2,0] )))
)-- 4. 应用姿态打关键帧
fn applyPhonemePoses phonemeData =
(if phonemeData == undefined then (format "No valid phoneme data provided.\n"return false)for i = 0 to phonemeData.Count - 1 do(local element = phonemeData.item[i]local timeSec = element.Item["time"].Value as floatlocal phoneme = element.Item["phoneme"].Value as stringlocal frameNum = timeSec * frameRateset animate on-- 查找对应音素姿态for p in phonemePoses do(if p[1] == phoneme do(local pose = p[2]at time frameNum (for subPose in pose do(if subPose[1] == "jaw" do (ctrl_jaw.position = subPose[2])if isValidNode ctrl_jaw do addNewKey  ctrl_jaw frameNumif subPose[1] == "lipUpper" do (ctrl_lipUpper.position = subPose[2])if isValidNode ctrl_lipUpper do addNewKey ctrl_lipUpper frameNumif subPose[1] == "lipLower" do (ctrl_lipLower.position = subPose[2])if isValidNode ctrl_lipLower do addNewKey  ctrl_lipLower frameNum)                    )))set animate off)
)-- 5. 主函数入口
phonemeData = ReadandProcess JsonFilePath
applyPhonemePoses phonemeData