User:Natasa Siencnik/workshop1: Difference between revisions

From XPUB & Lens-Based wiki
No edit summary
No edit summary
Line 21: Line 21:
Python : Simple search and output
Python : Simple search and output
<source lang="php">
<source lang="php">
#!/usr/bin/python
#!/usr/bin/env python
#-*- coding:utf-8 -*-


import urllib2, json, sys, codecs
import os, urllib2, json, sys, codecs
 
url = 'http://search.twitter.com/search.json?q=%23revolution'
f = urllib2.urlopen(url)
data = json.load(f)
 
output = codecs.open("output.txt", "w", encoding='utf-8')


for entry in data['results']:
enc = codecs.getencoder("ascii")
    line = entry['created_at'] + ' : ' + entry['text']
    output.write(line)
    output.write('\n')
</source>
 
Django : Database tutorial
<source lang="php">
http://docs.djangoproject.com/en/1.2/intro/tutorial02/
</source>
 
Python : Search, implement language-list, check for doubles
<source lang="php">
#!/usr/bin/python
# -*- coding: utf-8 -*-
 
import os, urllib2, json, sys, codecs


def twittersearch(tag):  
def twittersearch(tag):  
     url = 'http://search.twitter.com/search.json?q=%%23%s' % tag
     url = enc(u'http://search.twitter.com/search.json?q=%%23%s' % tag, "xmlcharrefreplace")[0]
    print("Query-URL: " + url)
     f = urllib2.urlopen(url)
     f = urllib2.urlopen(url)
     data = json.load(f)
     data = json.load(f)
   
 
     out = []
     out = []
 
     for entry in data['results']:
     for entry in data['results']:
         d = {}
         d = {}
Line 61: Line 41:
             d[key] = entry[key]
             d[key] = entry[key]
         out.append(d)
         out.append(d)
       
     return out
     return out
   


list_of_languages = [
list_of_languages = [
Rewolusie,
u"Rewolusie",
Revolucion,
u"Revolucion",
الثورة,
#u"الثورة",
Հեղափոխություն,
#u"Հեղափոխություն",
Inqilab,
u"Inqilab",
Iraultza,
u"Iraultza",
Рэвалюцыі,
#u"–†—ç–≤–∞–ª—é—Ü—ã—ñ",
Революция,
#u"–†–µ–≤–æ–ª—é—Ü–∏—è",
Revolució,
u"Revolució",
革命,
#u"Èù©ÂëΩ",
Revolucija,
u"Revolucija",
Revoluce,
u"Revoluce",
Revolution,
u"Revolution",
Revolutie,
u"Revolutie",
Revolutsioon,
u"Revolutsioon",
Rebolusyon,
u"Rebolusyon",
Vallankumous,
u"Vallankumous",
Révolution,
u"Révolution",
Revolución,
u"Revolución",
რევოლუცია,
#u"რევოლუცია",
Επανάσταση,
#u"Επανάσταση",
Revolisyon,
u"Revolisyon",
מהפכה,
#u"מהפכה",
क्रांति,
#u"क्रांति",
Forradalom,
u"Forradalom",
Revolusi,
u"Revolusi",
Réabhlóid,
u"Réabhlóid",
Rivoluzione,
u"Rivoluzione",
革命,
#u"Èù©ÂëΩ",
רעוואָלוציע,
#u"רעוואָלוציע",
혁명,
#u"혁명",
Revolūcija,
u"Revol≈´cija",
Revoliucija,
u"Revoliucija",
Револуција,
#u"–†–µ–≤–æ–ª—É—Ü–∏—ò–∞",
Rivoluzzjoni,
u"Rivoluzzjoni",
انقلاب,
#u"انقلاب",
Rewolucji,
u"Rewolucji",
Revolução,
u"Revolução",
Revoluţie,
u"Revolu≈£ie",
Революции,
#u"–†–µ–≤–æ–ª—é—Ü–∏–∏",
Револуција,
#u"–†–µ–≤–æ–ª—É—Ü–∏—ò–∞",
Revolúcia,
u"Revol√∫cia",
Mapinduzi,
u"Mapinduzi",
การปฏิวัติ,
#u"การปฏิวัติ",
Devrim,
u"Devrim",
Революції,
#u"–†–µ–≤–æ–ª—é—Ü—ñ—ó",
Cách mạng,
u"C√°ch m·∫°ng",
Chwyldro,
u"Chwyldro",
]
]
 
out = []
out = []
for tag in list_of_languages:
    out.extend(twittersearch(tag))


for tag in list_of_lanuages:
ids = []
    out.extend(twittersearch(tag))


if entry['id'] in ids :
for entry in out:
         output.write ('non-unique id found\n')  
    if entry['id'] in ids:
     else :
         print('non-unique id found\n')
     else:
         ids.append(entry['id'])
         ids.append(entry['id'])


for entry in out:
     print("%(created_at)s - %(id)d : %(text)s\nIn %(iso_language_code)s" % entry)
     print("%(created_at)s - %(id)d : %(text)" % entry)
   
   
sys.exit()
sys.exit()
</source>
</source>

Revision as of 21:11, 10 March 2011


Rumor of Revolution

Web crawler searching Twitter for a Revolution

1 - Ideas
Presentation-Natasa-20110301.jpg

2 - Description
Text missing

3 - Technical Flowchart
TwitterRevolution Flowchart.jpg

4 - Script

Python : Simple search and output

#!/usr/bin/env python
#-*- coding:utf-8 -*-

import os, urllib2, json, sys, codecs

enc = codecs.getencoder("ascii")

def twittersearch(tag): 
    url = enc(u'http://search.twitter.com/search.json?q=%%23%s' % tag, "xmlcharrefreplace")[0]
    print("Query-URL: " + url)
    f = urllib2.urlopen(url)
    data = json.load(f)

    out = []
 
    for entry in data['results']:
        d = {}
        for key in ('created_at', 'id', 'text', 'iso_language_code') :
            d[key] = entry[key]
        out.append(d)
 
    return out

list_of_languages = [
u"Rewolusie",
u"Revolucion",
#u"الثورة",
#u"Հեղափոխություն",
u"Inqilab",
u"Iraultza",
#u"–†—ç–≤–∞–ª—é—Ü—ã—ñ",
#u"–†–µ–≤–æ–ª—é—Ü–∏—è",
u"Revolució",
#u"Èù©ÂëΩ",
u"Revolucija",
u"Revoluce",
u"Revolution",
u"Revolutie",
u"Revolutsioon",
u"Rebolusyon",
u"Vallankumous",
u"Révolution",
u"Revolución",
#u"რევოლუცია",
#u"Επανάσταση",
u"Revolisyon",
#u"מהפכה",
#u"क्रांति",
u"Forradalom",
u"Revolusi",
u"Réabhlóid",
u"Rivoluzione",
#u"Èù©ÂëΩ",
#u"רעוואָלוציע",
#u"혁명",
u"Revol≈´cija",
u"Revoliucija",
#u"–†–µ–≤–æ–ª—É—Ü–∏—ò–∞",
u"Rivoluzzjoni",
#u"انقلاب",
u"Rewolucji",
u"Revolução",
u"Revolu≈£ie",
#u"–†–µ–≤–æ–ª—é—Ü–∏–∏",
#u"–†–µ–≤–æ–ª—É—Ü–∏—ò–∞",
u"Revol√∫cia",
u"Mapinduzi",
#u"การปฏิวัติ",
u"Devrim",
#u"–†–µ–≤–æ–ª—é—Ü—ñ—ó",
u"C√°ch m·∫°ng",
u"Chwyldro",
]
 
out = []
 
for tag in list_of_languages:
    out.extend(twittersearch(tag))

ids = []

for entry in out:
    if entry['id'] in ids:
        print('non-unique id found\n')
    else:
        ids.append(entry['id'])

    print("%(created_at)s - %(id)d : %(text)s\nIn %(iso_language_code)s" % entry)	
 
sys.exit()


4 - Languages

Afriaans: Rewolusie Albanian: Revolucion Arabic: الثورة Armenian: Հեղափոխություն Azerbaijjani: Inqilab Basque: Iraultza Belarusian: Рэвалюцыі Bulgarian: Революция Catalan: Revolució Chinese: 革命 Croatian: Revolucija Czech: Revoluce Danish: Revolution Dutch: Revolutie English: Revolution Estonian: Revolutsioon Filipino: Rebolusyon Finnish: Vallankumous French: Révolution Galician: Revolución Georgian: რევოლუცია German: Revolution GreeK: Επανάσταση Haitian: Revolisyon Hebrew: מהפכה Hindi: क्रांति Hungarian: Forradalom Icelanding: Revolution Indonasian: Revolusi Irish: Réabhlóid Italian: Rivoluzione Japanese: 革命 Jiddish: רעוואָלוציע Korean: 혁명 Latvian: Revolūcija Lithuanian: Revoliucija Macedonian: Револуција Malay: Revolusi Maltese: Rivoluzzjoni Norvegian: Revolution Persian: انقلاب Polish: Rewolucji Portuguese: Revolução Romanian: Revoluţie Russian: Революции Serbian: Револуција Slovak: Revolúcia Slovenian: Revolucija Spanish: Revolución Swahili: Mapinduzi Swedish: Revolution Tagalog: Rebolusyon Thai: การปฏิวัติ Turkish: Devrim Ukrainian: Революції Urdu: انقلاب Vietnamese: Cách mạng Welsh: Chwyldro


5- Outcome
Mon, 24 Jan 2011 18:44:48 +0000 : Eating Cinnabon & preparing 4 #jan25 demo #Egypt #revolution
Mon, 24 Jan 2011 18:38:08 +0000 : Its time .. the chance that we should take.. #Revolution NOW - DOWN WITH 8 and 14 March !
Mon, 24 Jan 2011 18:34:24 +0000 : if it is time for anything.. its time for the #Revolution of the people, WAKE UP !


6 - Questions
Where can I find and implement a location or time_zone?
How can I use different languages (unicode / utf-8)?
What kind of program do I need to run this on a server?
How can I implement a time stamp and check for doubles?


And thanks to Stock, Laura, Lieven and Fako for answering my stupid questions. :)