Twitter by Python
Tweetをhtmlに変換する
Sample program
#!/usr/bin/python
#coding: utf-8
import re
import urllib
import datetime
################################################################
def tweet2html( title, link, date ):
##### get account (id) & tweet id (tid) ####
data = link.split('/')
id = data[3]
tid = data[5]
##### replace date&time ####
dd = re.split(' |-|:|T|Z',date)
date = datetime.datetime( int(dd[0]), int(dd[1]), int(dd[2]), int(dd[3]), int(dd[4]), int(dd[5]) )
date += datetime.timedelta( 0,0,0,0,0,9)
##### regular expression ####
tagLink = re.compile( '((?:https?|ftp):\/\/[!-~]+)' ) # regular expression for link
tagAt = re.compile('@[0-9a-z]+?( |:|$)') # regular expression for @account
tagSharp = re.compile('#.+?( |:|$)') # regular expression for #tag
##### add link for url #####
miter = tagLink.finditer(title) # (a) find tags which can use in for
if( miter != None ):
done = []
for m in miter:
if( not (m.group() in done) ): # (b) to avoid double replace
title = title.replace( m.group(), ( '%s' ) % ( m.group(), m.group() ) )
done.append(m.group())
##### add link for account (@) #####
miter = tagAt.finditer(title) # (a) find tags which can use in for
if( miter != None ):
done = []
for m in miter:
if( not (m.group() in done) ): # (b) to avoid double replace
account1 = re.sub( '( |:)', '', m.group() )
account2 = account1.replace('@','')
title = title.replace( account1, ( '%s' ) % ( account2, account1 ) )
done.append(m.group())
##### add link for tag (#) #####
miter = tagSharp.finditer(title) # (a) find tags which can use in for
if( miter != None ):
done = []
for m in miter:
if( not (m.group() in done) ): # (b) to avoid double replace
tag1 = re.sub( '( |:)', '', m.group() )
tag2 = urllib.quote(tag1.encode('utf_8'))
title = title.replace( tag1, ( '%s' ) % ( tag2, tag1 ) )
done.append(m.group())
return '''\
Re RT
at %s from %s
''' % ( title, id, tid, date, id, id )
##########################################################################################
title = u'@howbona @howbonb 置換のサンプルだい! http://t.co/abc #画像 #びっくり'
link = 'http://twitter.com/howbona/statuses/00000'
date = '2011-11-11T11:11:11Z'
print tweet2html( title, link, date ).encode('utf_8')
Sample program
Descripton
##### get account (id) & tweet id (tid) ####
data = link.split('/')
id = data[3]
tid = data[5]
The link data is splited with the delimiter '/'. Then, the account (id) and the tweet id ( tid are obtained.
##### replace date&time ####
dd = re.split(' |-|:|T|Z',date)
date = datetime.datetime( int(dd[0]), int(dd[1]), int(dd[2]), int(dd[3]), int(dd[4]), int(dd[5]) )
date += datetime.timedelta( 0,0,0,0,0,9)
The time data is splited. Then, the datetime data is constructed. Nine hour is added by the timedelt to adjust the Tokyo time. In this case, the time zone is usually used. But, I don't know how to use the time zone.
##### regular expression ####
tagLink = re.compile( '((?:https?|ftp):\/\/[!-~]+)' ) # regular expression for link
tagAt = re.compile('@[0-9a-z]+?( |:|$)') # regular expression for @account
tagSharp = re.compile('#.+?( |:|$)') # regular expression for #tag
The regular expressions to find the tags.
##### add link for url ##### miter = tagLink.finditer(title) # (a) find tags which can use in for if( miter != None ): done = [] for m in miter: if( not (m.group() in done) ): # (b) to avoid double replace title = title.replace( m.group(), ( '%s' ) % ( m.group(), m.group() ) ) done.append(m.group())The return of the finditer can be used in for loop.
To avoid double replacement, the processed words are stored in done. Before the replacements, the word is checked.