import httplib
import smtplib
import datetime
import settings
import functions
import subprocess
from datetime import datetime
from email.mime.text import MIMEText

"""
This module contains some functions that have to do with handling owners on
the crawler's side, namely porting the information contained within the config
file into data to be used as required, and sending e-mails with the gathered
information to the interested parties.
"""

comment_sign = "#"              # This parameter holds the symbol to be used
                                # as a comment delimiter for the owner config
                                # file


def gen_owners (filename):
  ''' Generates a dictionary of <URL:owner> from the config file
  
  Reads in the config file line by line, according to the syntax 
  (<e-mail>:<name>[|URL]), and stores each URL as a key, with the 
  corresponding owner as its value, in order to be able to quickly identify 
  which person is in charge of which URL when reporting feedback via e-mail.
  '''
  owners = {}
  own_file = open(filename, 'r')
  for line in own_file:
    if line.startswith(comment_sign):
      continue
    if line.find(comment_sign) != -1:
      line = line[:line.find(comment_sign)]
    if line.find("|") == -1:
      if "default" in owners:
        print "Warning! Duplicate default owners detected. Setting to %s." % (line.strip())
      owners["default"] = line.split(":")[0].strip()
    else:
      pos = line.find("|")
      owner = line.split(":")[0].strip()
      while pos != -1:
        if line.find("|", pos+1) != -1:
          url = line[pos+1:line.find("|", pos+1)].strip()
        else:
          url = line[pos+1:].strip()
        if owner.find(":") != -1:
          owner = owner[:owner.find(":")]
        owners[url] = owner
        pos = line.find("|", pos+1)
  own_file.close()
  return owners
  
def gen_urls (filename):
  ''' Generates a dictionary of <owner:[URLs]> from the config filename
  
  Mirrored result of the previous function, was added more for an easier way
  to read in information for the owner editing software. Can be moved into a
  separate file to completely separate the crawler and the editor's code bases
  if required.
  '''
  urls = {}
  own_file = open(filename, 'r')
  for line in own_file:
    cpos = line.find(comment_sign)
    if cpos != -1:
      if cpos == 0:
        continue
      line = line[:cpos]
    pos = line.find("|")
    if pos == -1:
      owner = line.strip()
      urls[owner] = set()
    else:
      owner = line[:pos].strip()
      ownedurl = set()
      while pos != -1:
        if line.find("|", pos+1) != -1:
          url = line[pos+1:line.find("|", pos+1)].strip()
        else:
          url = line[pos+1:].strip()
        ownedurl.add(url)
        pos = line.find("|", pos+1)
      urls[owner] = ownedurl
  own_file.close()
  return urls
  
def get_own_name (owner):
  ''' Given the login of an owner, returns the name associated or login
  
  Searches to file containing the definition of owners for the login provided
  and then checks if a name was given: if it was, it returns that name; if not,
  it returns the given login 
  '''
  own_name = owner
  login = owner.split("@")[0]
  try:
    finger_res = subprocess.check_output(["finger", login])
  except OSError as ose:
    pass
  namepos = finger_res.find("Name:")
  if namepos != -1:
    own_name = finger_res[namepos+len("Name:"):finger_res.find("\n", namepos)].strip()
  else:
    own_file = open(settings.OWNER_CONFIG_FILENAME)
    for line in own_file:
      if owner in line:
        if ":" in line:
          own_name = line[line.find(":")+1:line.find("|")]
        break
    own_file.close()
  return own_name  
  
def check_owners (err_links, ownurls):
  ''' Sends appropriate e-mails given the list of broken links found
  
  Given the list of broken links, as well as a dictionary of <URL:owner>, 
  builds up a message of the necessary feedback to send to the e-mail address
  associated with a particular URL, notifying of the broken links present.
  '''
  
  err_owners = {}
  msg = {}
  err_links = functions.parse_errors(err_links)
  own_file = open(settings.OWNER_CONFIG_FILENAME);
  
  for lnk in err_links.keys():
    owner = ownurls["default"]
    if "~" in lnk:
      tpos = lnk.find("~")+1
      epos = lnk.find("/", tpos)
      if epos == -1:
        owner = lnk[tpos:] + "@imperial.ac.uk"
      else:
        owner = lnk[tpos:epos] + "@imperial.ac.uk"
    else:
      curr_len = 0
      for url in ownurls.keys():
        if url in lnk and len(url) > curr_len:
          owner = ownurls[url]
        curr_len = len(url)
    err_owners[lnk] = owner
    msg[owner] = "The following pages have been identified to contain broken links:\n"
  
  own_file.close()
  for url in sorted(err_links.keys()):
    msg[err_owners[url]] += "\t%s\n" % (url)
    
  for owner in set(err_owners.values()):
    own_name = get_own_name(owner)
    #msg[owner] = "Dear %s,\n\nPlease find mocked versions of the pages in question at %s/%s, highlighting the broken links and take appropriate measures to fix them.\n\n" % (own_name, settings.MOCK_PAGE_LOC, owner.split("@")[0]) + msg[owner]
    report_link = "http://project10.doc.ic.ac.uk:8000/displaypage/?uname=%s" % (owner.split("@")[0])
    extra_mail = "For this run, links that contain a question mark sign are ignored due to the issue mentioned with false positives in the e-mail sent on September 3rd. Therefore, any links indicated as broken should be so."
    msg[owner] = "Dear %s,\n\nPlease find mocked versions of the pages in question at %s, highlighting the broken links and take appropriate measures to fix them. [Link currently working only if connected to the Imperial network]\n\n%s\n\nFor any issues encountered or feedback, please reply to this e-mail.\n\n" % (own_name, report_link, extra_mail) + msg[owner]

    mail = MIMEText(msg[owner])
    mail['Subject'] = "[auto/crawler] Broken links on %s at %s" % (datetime.date(datetime.now()).isoformat(), datetime.time(datetime.now().replace(microsecond=0)).isoformat())
    mail['From'] = settings.SMTP_FROM
    mail['To'] = owner
    #mail['To'] = "al2510@imperial.ac.uk"
    
    try:
      s = smtplib.SMTP(settings.SMTP_SERVER)
      s.sendmail(settings.SMTP_FROM, owner, mail.as_string())
      #s.sendmail(settings.SMTP_FROM, "al2510@imperial.ac.uk", mail.as_string())
      print "Sent mail for %s." % (owner)
      s.quit()
    except smtplib.SMTPException as smtpe:
      print smtpe
      print "Error sending mail for %s." % (owner)
    

def get_owner(error_links, owners):
  owners_and_pages = {}
  for (url, errs) in error_links.iteritems():
    owner = owners["default"]
    if "~" in url:
      tpos = url.find("~")+1
      epos = url.find("/", tpos)
      if epos == -1:
        owner = url[tpos:] + "@imperial.ac.uk"
      else:
        owner = url[tpos:epos] + "@imperial.ac.uk"
    else:
      curr_len = 0
      for ourl in owners.keys():
        if ourl in url and len(ourl) > curr_len:
          owner = owners[ourl]
        curr_len = len(ourl)
        '''
    maxlen = 0
    for ownurl in owners:
      if ownurl in url and len(ownurl) > maxlen:
        own = owners[ownurl]
        maxlen = len(ownurl)
    if maxlen == 0:
      own = owners['default']
'''
    if not owner in owners_and_pages:
      owners_and_pages[owner] = {}
    owners_and_pages[owner][url] = errs
  return owners_and_pages
