User:Lbattich/Tests with VideoGrep and python code video editing
Test video source:
"The Poetry of Archiving, Kenneth Goldsmith" Interview by Cornelia Sollfrank
Python code to be run on bash (in Terminal). Based on parsing a corresponding subtitle file for the video.
Python code to be run on bash (in Terminal)
To extract a single word:
import re # module for regular expressions
def convert_time(timestring):
""" Converts a string into seconds """
nums = map(float, re.findall(r'\d+', timestring))
return 3600*nums[0] + 60*nums[1] + nums[2] + nums[3]/1000
with open("Goldsmith.srt") as f:
lines = f.readlines()
times_texts = []
current_times , current_text = None, ""
for line in lines:
times = re.findall("[0-9]*:[0-9]*:[0-9]*,[0-9]*", line)
if times != []:
current_times = map(convert_time, times)
elif line == '\n':
times_texts.append((current_times, current_text))
current_times, current_text = None, ""
elif current_times is not None:
current_text = current_text + line.replace("\n"," ")
# print (times_texts)
from collections import Counter
whole_text = " ".join([text for (time, text) in times_texts])
all_words = re.findall("\w+", whole_text)
counter = Counter([w.lower() for w in all_words if len(w)>5])
# print (counter.most_common(10))
# Find single words:
def find_word(word, padding=.09):
""" Finds all 'exact' (t_start, t_end) for a word """
matches = [re.search(word, text)
for (t,text) in times_texts]
return [(t1 + m.start()*(t2-t1)/len(text) - padding,
t1 + m.end()*(t2-t1)/len(text) + padding)
for m,((t1,t2),text) in zip(matches, times_texts)
if (m is not None)]
# always include:
from moviepy.editor import *
video = VideoFileClip("Goldsmith.mp4")
def assemble_cuts(cuts, outputfile):
""" Concatenate cuts and generate a video file. """
final = concatenate([video.subclip(start, end)
for (start,end) in cuts])
final.to_videofile(outputfile)
assemble_cuts( find_word("you know"), "copy.mp4")
To (try to) construct a sentence:
import re # module for regular expressions
def convert_time(timestring):
""" Converts a string into seconds """
nums = map(float, re.findall(r'\d+', timestring))
return 3600*nums[0] + 60*nums[1] + nums[2] + nums[3]/1000
with open("Goldsmith.srt") as f:
lines = f.readlines()
times_texts = []
current_times , current_text = None, ""
for line in lines:
times = re.findall("[0-9]*:[0-9]*:[0-9]*,[0-9]*", line)
if times != []:
current_times = map(convert_time, times)
elif line == '\n':
times_texts.append((current_times, current_text))
current_times, current_text = None, ""
elif current_times is not None:
current_text = current_text + line.replace("\n"," ")
# print (times_texts)
from collections import Counter
whole_text = " ".join([text for (time, text) in times_texts])
all_words = re.findall("\w+", whole_text)
counter = Counter([w.lower() for w in all_words if len(w)>5])
# print (counter.most_common(10))
# Find single words:
def find_word(word, padding=.09):
""" Finds all 'exact' (t_start, t_end) for a word """
matches = [re.search(word, text)
for (t,text) in times_texts]
return [(t1 + m.start()*(t2-t1)/len(text) - padding,
t1 + m.end()*(t2-t1)/len(text) + padding)
for m,((t1,t2),text) in zip(matches, times_texts)
if (m is not None)]
# always include:
from moviepy.editor import *
video = VideoFileClip("Goldsmith.mp4")
def assemble_cuts(cuts, outputfile):
""" Concatenate cuts and generate a video file. """
final = concatenate([video.subclip(start, end)
for (start,end) in cuts])
final.to_videofile(outputfile)
words = ["you know", "copyright", "UbuWeb", "poetry"]
numbers = [4,11,4,3] # take clip number 'n'
cuts = [find_word(word)[n] for (word,n) in zip(words, numbers)]
assemble_cuts(cuts, "fake.mp4")