User:Lbattich/Tests with VideoGrep and python code video editing

Test video source:

"The Poetry of Archiving, Kenneth Goldsmith" Interview by Cornelia Sollfrank

Python code to be run on bash (in Terminal). Based on parsing a corresponding subtitle file for the video.

Python code to be run on bash (in Terminal)

To extract a single word:

import re # module for regular expressions

def convert_time(timestring):
    """ Converts a string into seconds """
    nums = map(float, re.findall(r'\d+', timestring))
    return 3600*nums[0] + 60*nums[1] + nums[2] + nums[3]/1000

with open("Goldsmith.srt") as f:
    lines = f.readlines()

times_texts = []
current_times , current_text = None, ""
for line in lines:
    times = re.findall("[0-9]*:[0-9]*:[0-9]*,[0-9]*", line)
    if times != []:
        current_times = map(convert_time, times)
    elif line == '\n':
        times_texts.append((current_times, current_text))
        current_times, current_text = None, ""
    elif current_times is not None:
        current_text = current_text + line.replace("\n"," ")

# print (times_texts)

from collections import Counter
whole_text = " ".join([text for (time, text) in times_texts])
all_words = re.findall("\w+", whole_text)
counter = Counter([w.lower() for w in all_words if len(w)>5])
# print (counter.most_common(10))

# Find single words:

def find_word(word, padding=.09):
    """ Finds all 'exact' (t_start, t_end) for a word """
    matches = [re.search(word, text)
               for (t,text) in times_texts]
    return [(t1 + m.start()*(t2-t1)/len(text) - padding,
             t1 + m.end()*(t2-t1)/len(text) + padding)
             for m,((t1,t2),text) in zip(matches, times_texts)
             if (m is not None)]


# always include:

from moviepy.editor import *

video = VideoFileClip("Goldsmith.mp4")

def assemble_cuts(cuts, outputfile):
    """ Concatenate cuts and generate a video file. """
    final = concatenate([video.subclip(start, end)
                         for (start,end) in cuts])
    final.to_videofile(outputfile)

assemble_cuts( find_word("you know"), "copy.mp4")

To (try to) construct a sentence:

import re # module for regular expressions

def convert_time(timestring):
    """ Converts a string into seconds """
    nums = map(float, re.findall(r'\d+', timestring))
    return 3600*nums[0] + 60*nums[1] + nums[2] + nums[3]/1000

with open("Goldsmith.srt") as f:
    lines = f.readlines()

times_texts = []
current_times , current_text = None, ""
for line in lines:
    times = re.findall("[0-9]*:[0-9]*:[0-9]*,[0-9]*", line)
    if times != []:
        current_times = map(convert_time, times)
    elif line == '\n':
        times_texts.append((current_times, current_text))
        current_times, current_text = None, ""
    elif current_times is not None:
        current_text = current_text + line.replace("\n"," ")

# print (times_texts)

from collections import Counter
whole_text = " ".join([text for (time, text) in times_texts])
all_words = re.findall("\w+", whole_text)
counter = Counter([w.lower() for w in all_words if len(w)>5])
# print (counter.most_common(10))

# Find single words:

def find_word(word, padding=.09):
    """ Finds all 'exact' (t_start, t_end) for a word """
    matches = [re.search(word, text)
               for (t,text) in times_texts]
    return [(t1 + m.start()*(t2-t1)/len(text) - padding,
             t1 + m.end()*(t2-t1)/len(text) + padding)
             for m,((t1,t2),text) in zip(matches, times_texts)
             if (m is not None)]


# always include:

from moviepy.editor import *

video = VideoFileClip("Goldsmith.mp4")

def assemble_cuts(cuts, outputfile):
    """ Concatenate cuts and generate a video file. """
    final = concatenate([video.subclip(start, end)
                         for (start,end) in cuts])
    final.to_videofile(outputfile)


words = ["you know", "copyright", "UbuWeb", "poetry"]
numbers = [4,11,4,3] # take clip number 'n'

cuts = [find_word(word)[n] for (word,n) in zip(words, numbers)]
assemble_cuts(cuts, "fake.mp4")