Twitter by Python


Tweetをhtmlに変換する

Sample program
#!/usr/bin/python
#coding: utf-8

import re
import urllib
import datetime

################################################################
def tweet2html( title, link, date ):
##### get account (id) & tweet id (tid) ####
	data = link.split('/')
	id = data[3]
	tid = data[5]

##### replace date&time ####
	dd = re.split(' |-|:|T|Z',date)
	date = datetime.datetime( int(dd[0]), int(dd[1]), int(dd[2]), int(dd[3]), int(dd[4]), int(dd[5]) )
	date += datetime.timedelta( 0,0,0,0,0,9)
	
##### regular expression  ####
	tagLink = re.compile( '((?:https?|ftp):\/\/[!-~]+)' ) # regular expression for link
	tagAt = re.compile('@[0-9a-z]+?( |:|$)') # regular expression for @account
	tagSharp = re.compile('#.+?( |:|$)') # regular expression for #tag

##### add link for url #####
	miter = tagLink.finditer(title) # (a) find tags which can use in for
	if( miter != None ):
		done = []
		for m in miter:
			if( not (m.group() in done) ): # (b) to avoid double replace
				title = title.replace( m.group(), ( '%s' ) % ( m.group(), m.group() ) )
				done.append(m.group())

##### add link for account (@) #####
	miter = tagAt.finditer(title) # (a) find tags which can use in for
	if( miter != None ):
		done = []
		for m in miter:
			if( not (m.group() in done) ): # (b) to avoid double replace
				account1 = re.sub( '( |:)', '', m.group() )
				account2 = account1.replace('@','')
				title = title.replace( account1, ( '%s' ) % ( account2, account1 ) )
				done.append(m.group())

##### add link for tag (#) #####
	miter = tagSharp.finditer(title) # (a) find tags which can use in for
	if( miter != None ):
		done = []
		for m in miter:
			if( not (m.group() in done) ): # (b) to avoid double replace
				tag1 = re.sub( '( |:)', '', m.group() )
				tag2 = urllib.quote(tag1.encode('utf_8'))
				title = title.replace( tag1, ( '%s' ) % ( tag2, tag1 ) )
				done.append(m.group())

	return '''\

Re RT
at %s from %s 

''' % ( title, id, tid, date, id, id ) ########################################################################################## title = u'@howbona @howbonb 置換のサンプルだい! http://t.co/abc #画像 #びっくり' link = 'http://twitter.com/howbona/statuses/00000' date = '2011-11-11T11:11:11Z' print tweet2html( title, link, date ).encode('utf_8')
Sample program

Descripton

##### get account (id) & tweet id (tid) ####
	data = link.split('/')
	id = data[3]
	tid = data[5]
The link data is splited with the delimiter '/'. Then, the account (id) and the tweet id ( tid are obtained.

##### replace date&time ####
	dd = re.split(' |-|:|T|Z',date)
	date = datetime.datetime( int(dd[0]), int(dd[1]), int(dd[2]), int(dd[3]), int(dd[4]), int(dd[5]) )
	date += datetime.timedelta( 0,0,0,0,0,9)
The time data is splited. Then, the datetime data is constructed. Nine hour is added by the timedelt to adjust the Tokyo time. In this case, the time zone is usually used. But, I don't know how to use the time zone.

##### regular expression  ####
	tagLink = re.compile( '((?:https?|ftp):\/\/[!-~]+)' ) # regular expression for link
	tagAt = re.compile('@[0-9a-z]+?( |:|$)') # regular expression for @account
	tagSharp = re.compile('#.+?( |:|$)') # regular expression for #tag
The regular expressions to find the tags.

##### add link for url #####
	miter = tagLink.finditer(title) # (a) find tags which can use in for
	if( miter != None ):
		done = []
		for m in miter:
			if( not (m.group() in done) ): # (b) to avoid double replace
				title = title.replace( m.group(), ( '%s' ) % ( m.group(), m.group() ) )
				done.append(m.group())
The return of the finditer can be used in for loop.
To avoid double replacement, the processed words are stored in done. Before the replacements, the word is checked.