格式工厂合并 mp4 和 srt,并利用 python 按照字幕剪辑视频,将其分割为若干小段

格式工厂合并 mp4 和 srt,并利用 python 按照字幕剪辑视频,将其分割为若干小段

一、视频合并

1.选择转换为mp4,将视频导入格式工厂

2.调整字幕样式

格式工厂合并 mp4 和 srt,并利用 python 按照字幕剪辑视频,将其分割为若干小段_第1张图片

二、python

1.可能用到的命令:

pip install moviepy

2.main.py

import cut_srt
import cut_video

if __name__ == '__main__':
    my_video_path = r"D:\Videos\Star Wars\Star Wars 9 The.Rise.of.Skywalker.2019.mp4"
    # 将目标文件夹里所有的srt文件都进行格式化txt操作
    my_srt_path = r"D:\Documents"
    cut_srt.srt_to_format_txt(my_srt_path)
    cut_video.cut_video_by_srt(my_video_path, my_srt_path)

3.cut_srt.py

import re
import os


def check_contain_chinese(check_str):
    for ch in check_str:
        if u'\u4e00' <= ch <= u'\u9fff':
            return True
    return False


# 输入原标题,返回格式化后的文件名
def validate_title(str_):
    pattern = r"[\/\\\:\*\?\"\<\>\|\.\,\!\'\-\♪\?\!\…\“\”\,]"
    new_title = re.sub(pattern, "", str_)
    # strip() 方法用于移除字符串头尾指定的字符(默认为空格或换行符)或字符序列。注意:该方法只能删除开头或是结尾的字符,不能删除中间部分的字符
    # re.sub(' +', ' ', str) 将 str 中的多个空格转化为一个空格
    return re.sub(' +', ' ', new_title.lower().strip())


# 输入字幕文件,得到由字幕组成的二维列表
def get_format_sequences(srt_path_):
    with open(srt_path_, 'r', encoding='utf-8-sig') as f:
        content_ = f.read()
    sequences_ = content_.split('\n\n')
    sequences_ = [se.split('\n') for se in sequences_]
    # 去除每一句空值
    sequences_ = [list(filter(None, sequence)) for sequence in sequences_]
    new_sequences = []
    for se in sequences_:
        if len(se) == 4:
            new_sequences.append(se)
    i = 0
    for se in new_sequences:
        # 序号,时间段,字幕1,字幕2
        new_sequences[i] = [se[0], se[1], se[2], se[3]]
        i += 1
    return new_sequences


# 输入时间 1:20:12, 输出对应时间的秒数(1*3600+20*60+12)
def str2sec(x):
    h, m, s = x.split(':')
    return int(h) * 3600 + int(m) * 60 + int(s)


# 输入 02:09:53,440 --> 02:09:55,740,返回一个起始时间对应的秒数和结束时间对应的秒数
def get_start_end_time(str_):
    start_time_, end_time_ = str_.strip().split("-->")
    start_time_ = start_time_.split(",")[0]
    end_time_ = end_time_.split(",")[0]
    return str2sec(start_time_), str2sec(end_time_)


def srt_to_format_txt(srt_path):
    for fileName in os.listdir(srt_path):
        if fileName.endswith(".srt"):
            print(fileName)
            file_path = srt_path + "\\" + fileName
            count = 1
            sequences = get_format_sequences(file_path)
            # 判断字幕第一行是否包含中文
            if check_contain_chinese(sequences[0][2]):
                en_position = 3
                ch_position = 2
            else:
                en_position = 2
                ch_position = 3
            # r: 以只读方式打开文件。文件的指针将会放在文件的开头
            # rb: 以二进制格式打开一个文件用于只读。文件指针将会放在文件的开头
            # w: 打开一个文件只用于写入。如果该文件已存在则将其覆盖。如果该文件不存在,创建新文件
            # wb: 以二进制格式打开一个文件只用于写入。如果该文件已存在则将其覆盖。如果该文件不存在,创建新文件
            # a: 打开一个文件用于追加。如果该文件已存在,文件指针将会放在文件的结尾。也就是说,新的内容将会被写入到已有内容之后。如果该文件不存在,创建新文件进行写入
            # ab: 以二进制格式打开一个文件用于追加。如果该文件已存在,文件指针将会放在文件的结尾。也就是说,新的内容将会被写入到已有内容之后。如果该文件不存在,创建新文件进行写入
            with open(file_path.replace(".srt", ".csv"), "w", encoding='utf-8-sig') as f:
                for i in sequences:
                    en = i[en_position]
                    ch = i[ch_position]
                    movie_name = file_path.split("\\")[-1].replace(".srt", "")
                    count_format = "{:05d}".format(count)
                    sentence_id = i[0]
                    en_format = re.sub(' +', ' ', en.replace("- ", " ").replace(",", " ").strip())
                    ch_format = validate_title(ch)
                    line = movie_name + "," + count_format + "," + sentence_id + "," + en_format + "," + ch_format
                    print(line)
                    f.write(line + "\n")
                    count = count + 1

4.cut_video.py

import os
import time
from moviepy.video.io.VideoFileClip import VideoFileClip
from cut_srt import get_format_sequences, get_start_end_time


def cut_video_by_start_end(video_path_, save_file_path_, my_start, my_end, save_name):
    video = VideoFileClip(video_path_)
    video = video.subclip(my_start, my_end)
    video.write_videofile(save_file_path_ + save_name, fps=24, logger=None)
    video.close()


def cut_video_by_srt(video_path, srt_path):
    # 将裁剪后的视频片段存放在 save_file_path
    save_file_path = video_path.replace(".mp4", "\\")
    if not os.path.exists(save_file_path):
        os.makedirs(save_file_path)
    count = len(os.listdir(save_file_path))
    if count == 0:
        count = 1
        start_index = 0
    else:
        count = count
        start_index = count - 1
    sequences = get_format_sequences(srt_path)
    my_time = time.time()
    for i in sequences[start_index:]:
        file_name = "{:05d}".format(count) + ".mp4"
        print(srt_path.split("\\")[-1].replace(".srt", "") +
              "共" + str(len(sequences)) + "个,当前:" + file_name +
              ", 当前进度:" + str("{:<.2f}".format((count / len(sequences)) * 100)) + "%" + "," +
              " 耗时:" + str("{:<.2f}".format(time.time() - my_time)) + "s")
        start_time, end_time = get_start_end_time(i[1])
        cut_video_by_start_end(video_path, save_file_path, start_time - 2, end_time + 2, file_name)
        count = count + 1

你可能感兴趣的