User:Natasa Siencnik/workshop1: Difference between revisions
No edit summary |
No edit summary |
||
Line 21: | Line 21: | ||
Python : Simple search and output | Python : Simple search and output | ||
<source lang="php"> | <source lang="php"> | ||
#!/usr/bin/python | #!/usr/bin/env python | ||
#-*- coding:utf-8 -*- | |||
import urllib2, json, sys, codecs | import os, urllib2, json, sys, codecs | ||
enc = codecs.getencoder("ascii") | |||
def twittersearch(tag): | def twittersearch(tag): | ||
url = 'http://search.twitter.com/search.json?q=%%23%s' % tag | url = enc(u'http://search.twitter.com/search.json?q=%%23%s' % tag, "xmlcharrefreplace")[0] | ||
print("Query-URL: " + url) | |||
f = urllib2.urlopen(url) | f = urllib2.urlopen(url) | ||
data = json.load(f) | data = json.load(f) | ||
out = [] | out = [] | ||
for entry in data['results']: | for entry in data['results']: | ||
d = {} | d = {} | ||
Line 61: | Line 41: | ||
d[key] = entry[key] | d[key] = entry[key] | ||
out.append(d) | out.append(d) | ||
return out | return out | ||
list_of_languages = [ | list_of_languages = [ | ||
Rewolusie, | u"Rewolusie", | ||
Revolucion, | u"Revolucion", | ||
#u"الثورة", | |||
#u"Հեղափոխություն", | |||
Inqilab, | u"Inqilab", | ||
Iraultza, | u"Iraultza", | ||
#u"–†—ç–≤–∞–ª—é—Ü—ã—ñ", | |||
#u"–†–µ–≤–æ–ª—é—Ü–∏—è", | |||
u"Revolució", | |||
#u"Èù©ÂëΩ", | |||
Revolucija, | u"Revolucija", | ||
Revoluce, | u"Revoluce", | ||
Revolution, | u"Revolution", | ||
Revolutie, | u"Revolutie", | ||
Revolutsioon, | u"Revolutsioon", | ||
Rebolusyon, | u"Rebolusyon", | ||
Vallankumous, | u"Vallankumous", | ||
u"Révolution", | |||
u"Revolución", | |||
#u"რევოლუცია", | |||
#u"Επανάσταση", | |||
Revolisyon, | u"Revolisyon", | ||
#u"מהפכה", | |||
#u"क्रांति", | |||
Forradalom, | u"Forradalom", | ||
Revolusi, | u"Revolusi", | ||
u"Réabhlóid", | |||
Rivoluzione, | u"Rivoluzione", | ||
#u"Èù©ÂëΩ", | |||
#u"רעוואָלוציע", | |||
#u"혁명", | |||
u"Revol≈´cija", | |||
Revoliucija, | u"Revoliucija", | ||
#u"–†–µ–≤–æ–ª—É—Ü–∏—ò–∞", | |||
Rivoluzzjoni, | u"Rivoluzzjoni", | ||
#u"انقلاب", | |||
Rewolucji, | u"Rewolucji", | ||
u"Revolução", | |||
u"Revolu≈£ie", | |||
#u"–†–µ–≤–æ–ª—é—Ü–∏–∏", | |||
#u"–†–µ–≤–æ–ª—É—Ü–∏—ò–∞", | |||
u"Revol√∫cia", | |||
Mapinduzi, | u"Mapinduzi", | ||
#u"การปฏิวัติ", | |||
Devrim, | u"Devrim", | ||
#u"–†–µ–≤–æ–ª—é—Ü—ñ—ó", | |||
u"C√°ch m·∫°ng", | |||
Chwyldro, | u"Chwyldro", | ||
] | ] | ||
out = [] | out = [] | ||
for tag in list_of_languages: | |||
out.extend(twittersearch(tag)) | |||
ids = [] | |||
if entry['id'] in ids : | for entry in out: | ||
if entry['id'] in ids: | |||
else : | print('non-unique id found\n') | ||
else: | |||
ids.append(entry['id']) | ids.append(entry['id']) | ||
print("%(created_at)s - %(id)d : %(text)s\nIn %(iso_language_code)s" % entry) | |||
print("%(created_at)s - %(id)d : %(text)" % entry) | |||
sys.exit() | sys.exit() | ||
</source> | </source> |
Revision as of 21:11, 10 March 2011
Rumor of Revolution
Web crawler searching Twitter for a Revolution
1 - Ideas
2 - Description
Text missing
3 - Technical Flowchart
4 - Script
Python : Simple search and output
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import os, urllib2, json, sys, codecs
enc = codecs.getencoder("ascii")
def twittersearch(tag):
url = enc(u'http://search.twitter.com/search.json?q=%%23%s' % tag, "xmlcharrefreplace")[0]
print("Query-URL: " + url)
f = urllib2.urlopen(url)
data = json.load(f)
out = []
for entry in data['results']:
d = {}
for key in ('created_at', 'id', 'text', 'iso_language_code') :
d[key] = entry[key]
out.append(d)
return out
list_of_languages = [
u"Rewolusie",
u"Revolucion",
#u"الثورة",
#u"Հեղափոխություն",
u"Inqilab",
u"Iraultza",
#u"–†—ç–≤–∞–ª—é—Ü—ã—ñ",
#u"–†–µ–≤–æ–ª—é—Ü–∏—è",
u"Revolució",
#u"Èù©ÂëΩ",
u"Revolucija",
u"Revoluce",
u"Revolution",
u"Revolutie",
u"Revolutsioon",
u"Rebolusyon",
u"Vallankumous",
u"Révolution",
u"Revolución",
#u"რევოლუცია",
#u"Επανάσταση",
u"Revolisyon",
#u"מהפכה",
#u"क्रांति",
u"Forradalom",
u"Revolusi",
u"Réabhlóid",
u"Rivoluzione",
#u"Èù©ÂëΩ",
#u"רעוואָלוציע",
#u"혁명",
u"Revol≈´cija",
u"Revoliucija",
#u"–†–µ–≤–æ–ª—É—Ü–∏—ò–∞",
u"Rivoluzzjoni",
#u"انقلاب",
u"Rewolucji",
u"Revolução",
u"Revolu≈£ie",
#u"–†–µ–≤–æ–ª—é—Ü–∏–∏",
#u"–†–µ–≤–æ–ª—É—Ü–∏—ò–∞",
u"Revol√∫cia",
u"Mapinduzi",
#u"การปฏิวัติ",
u"Devrim",
#u"–†–µ–≤–æ–ª—é—Ü—ñ—ó",
u"C√°ch m·∫°ng",
u"Chwyldro",
]
out = []
for tag in list_of_languages:
out.extend(twittersearch(tag))
ids = []
for entry in out:
if entry['id'] in ids:
print('non-unique id found\n')
else:
ids.append(entry['id'])
print("%(created_at)s - %(id)d : %(text)s\nIn %(iso_language_code)s" % entry)
sys.exit()
4 - Languages
Afriaans: Rewolusie Albanian: Revolucion Arabic: الثورة Armenian: Հեղափոխություն Azerbaijjani: Inqilab Basque: Iraultza Belarusian: Рэвалюцыі Bulgarian: Революция Catalan: Revolució Chinese: 革命 Croatian: Revolucija Czech: Revoluce Danish: Revolution Dutch: Revolutie English: Revolution Estonian: Revolutsioon Filipino: Rebolusyon Finnish: Vallankumous French: Révolution Galician: Revolución Georgian: რევოლუცია German: Revolution GreeK: Επανάσταση Haitian: Revolisyon Hebrew: מהפכה Hindi: क्रांति Hungarian: Forradalom Icelanding: Revolution Indonasian: Revolusi Irish: Réabhlóid Italian: Rivoluzione Japanese: 革命 Jiddish: רעוואָלוציע Korean: 혁명 Latvian: Revolūcija Lithuanian: Revoliucija Macedonian: Револуција Malay: Revolusi Maltese: Rivoluzzjoni Norvegian: Revolution Persian: انقلاب Polish: Rewolucji Portuguese: Revolução Romanian: Revoluţie Russian: Революции Serbian: Револуција Slovak: Revolúcia Slovenian: Revolucija Spanish: Revolución Swahili: Mapinduzi Swedish: Revolution Tagalog: Rebolusyon Thai: การปฏิวัติ Turkish: Devrim Ukrainian: Революції Urdu: انقلاب Vietnamese: Cách mạng Welsh: Chwyldro
5- Outcome
Mon, 24 Jan 2011 18:44:48 +0000 : Eating Cinnabon & preparing 4 #jan25 demo #Egypt #revolution
Mon, 24 Jan 2011 18:38:08 +0000 : Its time .. the chance that we should take.. #Revolution NOW - DOWN WITH 8 and 14 March !
Mon, 24 Jan 2011 18:34:24 +0000 : if it is time for anything.. its time for the #Revolution of the people, WAKE UP !
6 - Questions
Where can I find and implement a location or time_zone?
How can I use different languages (unicode / utf-8)?
What kind of program do I need to run this on a server?
How can I implement a time stamp and check for doubles?
And thanks to Stock, Laura, Lieven and Fako for answering my stupid questions. :)