#!/usr/local/bin/python

import common
import ConfigParser
import HTMLParser
import imp
import re
import string
import sys
import urllib

module_find_triggers = ['http://', 'https://']

class AppURLopener(urllib.FancyURLopener):
      version = 'Av_IRC_Bot/1.0'

urllib._urlopener = AppURLopener()

def Gather(keyword = None, msg = None, irc = None, channel = None):
  module_location = imp.find_module('common')
  module = imp.load_module('common', *module_location)
  url = ExtractURL(msg)
  title = GetPageTitle(url)
  cursor = common.SetupDB()
  if cursor:
    result = MatchURL(url)
  else:
    result = None
  if result and title:
    msg = '%s : %s' % (result, title)
  elif result:
    msg = '%s' % result
  elif url and title:
    msg = '%s : %s' % (url, title)
  else:
    msg = '%s' % url
  common.DeliverMessage(msg, irc, channel)


def Shorten(url):
  cursor = common.SetupDB()
  selectsql = 'SELECT id FROM urlredirect WHERE url = "%s"' % url
  cursor.execute(selectsql)
  numrows = int(cursor.rowcount)
  if numrows > 0:
    row = cursor.fetchone()
    short_url_id = row[0]
  else:
    insertsql = 'INSERT INTO urlredirect (url) VALUES ("%s")' % url
    cursor.execute(insertsql)
    short_url_id = cursor.connection.insert_id()
  return short_url_id


def Expand(id):
  cursor = common.SetupDB()
  selectsql = 'SELECT url FROM urlredirect WHERE id = "%s"' % id
  cursor.execute(selectsql)
  numrows = int(cursor.rowcount)
  if numrows > 0:
    row = cursor.fetchone()
    url = row[0].tostring()
    return url
  else:
    return None


def ExtractURL(msg):
  try:
    url = re.match('.*(http?\S+)', msg).group(1)
  except AttributeError:
    url = None
  return url

def MatchURL(url):
  url_config = ConfigParser.ConfigParser()
  url_config.readfp(open('config/url_shorten.conf'))
  url_path = url_config.get('url', 'url_path')
  # Are we a short URL?
  try:
    id = re.match(('.*%s([\d]+)' % url_path), url).group(1)
  except AttributeError:
    pass
  else:
    long_url = Expand(id)
    if long_url == None:
        long_msg = 'No such shortened URL!'
    else:
      long_msg = 'URL expanded: %s' % long_url
    return long_msg

  # Do we match the URL regex?
  if len(url) > 30:
    short_url = Shorten(url)
    if not short_url:
      short_msg = 'Unable to open URL: %s' % url
    else:
      short_msg = 'URL shortened: %s%s' % (url_path, short_url)
  else:
    return
  return short_msg


def GetPageTitle(url):
  """Get the <title> of the submitted web page

  Args:
    url: str
  Returns:
    title_text: str
  """
  try:
    html = urllib.urlopen(url).read(10240)
  except IOError:
    return None
  pattern = '.*<title>(.*?)</title>'
  title_p = re.compile(pattern, re.DOTALL | re.IGNORECASE)
  try:
    title_text = '%s' % title_p.match(html).group(1)
    title_text = ' '.join(string.splitfields(title_text))
  except AttributeError:
    title_text = None
  if title_text:
    if '403' in title_text or '404' in title_text:
      title_text = 'No title'
    return title_text

def main():
  try:
    keyword = sys.argv[1]
  except IndexError:
    sys.exit(1)
  try:
    argument = sys.argv[2]
  except IndexError:
    sys.exit(1)

  Gather(keyword, argument)

if __name__ == '__main__':
  main()

