User:Manetta/prototyping/videogrep-peter-sunde: Difference between revisions

From XPUB & Lens-Based wiki
(Created page with "= Videogrep = the following scripts are from: http://zulko.github.io/blog/2014/06/21/some-more-videogreping-with-python/ == Peter Sunde, constructed sentence: "We lost, th...")
 
(Undo revision 77539 by Manetta (talk))
 
(One intermediate revision by the same user not shown)
(No difference)

Latest revision as of 11:24, 7 April 2015

Videogrep

the following scripts are from: http://zulko.github.io/blog/2014/06/21/some-more-videogreping-with-python/

Peter Sunde, constructed sentence: "We lost, the internet is not for people."

Using the exact search to create a sentence with Peter's words.


import re # module for regular expressions

def convert_time(timestring):
    """ Converts a string into seconds """
    nums = map(float, re.findall(r'\d+', timestring))
    return 3600*nums[0] + 60*nums[1] + nums[2] + nums[3]/1000

with open("transmediale-opening-peter-sunde.srt") as f:
    lines = f.readlines()

times_texts = []
current_times , current_text = None, ""
for line in lines:
    times = re.findall("[0-9]*:[0-9]*:[0-9]*,[0-9]*", line)
    if times != []:
        current_times = map(convert_time, times)
    elif line == '\n':
        times_texts.append((current_times, current_text))
        current_times, current_text = None, ""
    elif current_times is not None:
        current_text = current_text + line.replace("\n"," ")

print (times_texts)

from collections import Counter
whole_text = " ".join([text for (time, text) in times_texts])
all_words = re.findall("\w+", whole_text)
counter = Counter([w.lower() for w in all_words if len(w)>5])
print (counter.most_common(10))

cuts = [times for (times,text) in times_texts
        if (re.findall("should",text) != [])]

from moviepy.editor import VideoFileClip, concatenate

video = VideoFileClip("transmediale-opening-peter-sunde.mp4")

def find_word(word, padding=.001):
    """ Finds all 'exact' (t_start, t_end) for a word """
    matches = [re.search(word, text)
               for (t,text) in times_texts]
    return [(t1 + m.start()*(t2-t1)/len(text) - padding,
             t1 + m.end()*(t2-t1)/len(text) + padding)
             for m,((t1,t2),text) in zip(matches, times_texts)
             if (m is not None)]

def assemble_cuts(cuts, outputfile):
    """ Concatenate cuts and generate a video file. """
    final = concatenate([video.subclip(start, end)
                         for (start,end) in cuts])
    final.to_videofile(outputfile)

words = ["we lost", "the", "internet", " ", "is not", " ", "for people", " "]
numbers = [0,2,0,2,0,3,0,5] # take clip number 'n'

cuts = [find_word(word)[n] for (word,n) in zip(words, numbers)]
assemble_cuts(cuts, "transmediale_curated-sentence-04.3.mp4")


Peter Sunde, summary

Making a short summary of the keynote, by using the automatic generated srt file from youtube, by using:

youtube-dl --write-auto-sub --srt-lang en [URL]

and adding a little 'letter-code' into it (here: xyz), for the sentences that should be included.

14 
00:00:48,329 --> 00:00:51,709
xqz a critical mass that are upset with the
current state of the internet

import re # module for regular expressions

def convert_time(timestring):
    """ Converts a string into seconds """
    nums = map(float, re.findall(r'\d+', timestring))
    return 3600*nums[0] + 60*nums[1] + nums[2] + nums[3]/1000

with open("transmediale-annotation.srt") as f:
    lines = f.readlines()

times_texts = []
current_times , current_text = None, ""
for line in lines:
    times = re.findall("[0-9]*:[0-9]*:[0-9]*,[0-9]*", line)
    if times != []:
        current_times = map(convert_time, times)
    elif line == '\n':
        times_texts.append((current_times, current_text))
        current_times, current_text = None, ""
    elif current_times is not None:
        current_text = current_text + line.replace("\n"," ")

print (times_texts)

from collections import Counter
whole_text = " ".join([text for (time, text) in times_texts])
all_words = re.findall("\w+", whole_text)
counter = Counter([w.lower() for w in all_words if len(w)>5])
print (counter.most_common(10))

cuts = [times for (times,text) in times_texts
        if (re.findall("xqz",text) != [])]

from moviepy.editor import VideoFileClip, concatenate

video = VideoFileClip("transmediale-opening-peter-sunde.mp4")

def assemble_cuts(cuts, outputfile):
    """ Concatenate cuts and generate a video file. """
    final = concatenate([video.subclip(start, end)
                         for (start,end) in cuts])
    final.to_videofile(outputfile)

assemble_cuts(cuts, "transmediale-annotation-02.1.mp4")


Peter Sunde, grepping on spaces, to grep the silence

#youtube-dl --write-srt --srt-lang en [URL]
#youtube-dl --write-auto-sub --srt-lang en [URL]

import re # module for regular expressions

def convert_time(timestring):
    """ Converts a string into seconds """
    nums = map(float, re.findall(r'\d+', timestring))
    return 3600*nums[0] + 60*nums[1] + nums[2] + nums[3]/1000

with open("transmediale-opening-peter-sunde.srt") as f:
    lines = f.readlines()

times_texts = []
current_times , current_text = None, ""
for line in lines:
    times = re.findall("[0-9]*:[0-9]*:[0-9]*,[0-9]*", line)
    if times != []:
        current_times = map(convert_time, times)
    elif line == '\n':
        times_texts.append((current_times, current_text))
        current_times, current_text = None, ""
    elif current_times is not None:
        current_text = current_text + line.replace("\n"," ")

print (times_texts)

from collections import Counter
whole_text = " ".join([text for (time, text) in times_texts])
all_words = re.findall("\w+", whole_text)
counter = Counter([w.lower() for w in all_words if len(w)>5])
print (counter.most_common(10))

cuts = [times for (times,text) in times_texts
        if (re.findall("should",text) != [])]

from moviepy.editor import VideoFileClip, concatenate

video = VideoFileClip("transmediale-opening-peter-sunde.mp4")

def find_word(word, padding=.02):
    """ Finds all 'exact' (t_start, t_end) for a word """
    matches = [re.search(word, text)
               for (t,text) in times_texts]
    return [(t1 + m.start()*(t2-t1)/len(text) - padding,
             t1 + m.end()*(t2-t1)/len(text) + padding)
             for m,((t1,t2),text) in zip(matches, times_texts)
             if (m is not None)]

def assemble_cuts(cuts, outputfile):
    """ Concatenate cuts and generate a video file. """
    final = concatenate([video.subclip(start, end)
                         for (start,end) in cuts])
    final.to_videofile(outputfile)

words = [" ", " ", " ", " ", " ", " "]
numbers = [0,1,2,3,4,5] # take clip number 'n'

cuts = [find_word(word)[n] for (word,n) in zip(words, numbers)]
assemble_cuts(cuts, "transmediale-silence-01.0.mp4")