import re

import re

html = input["html"]

# 使用正则表达式提取含有关键词 viewkey 的全部 URL 和 duration
matches = re.findall(r'href="(https://[^"]*viewkey=[^"]*)".*?span class="duration">([^<]*)</span>', html)

# 将 duration 转换为秒数的函数
def convert_duration_to_seconds(duration):
hours, minutes, seconds = map(int, duration.split(':'))
return hours * 3600 + minutes * 60 + seconds

# 将结果组织成字典,并将 duration 转换为秒数
results = [{"url": match[0], "duration": convert_duration_to_seconds(match[1])} for match in matches]

output = {'results': results}
 
 
Back to Top