ó
 Rc           @   s   d  d l  Z  d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l m Z d  d l m Z d Z d   Z	 d   Z
 d   Z d   Z d	   Z d S(
   i˙˙˙˙N(   t   datetime(   t   MIMETextt   #c         C   s§  i  } t  |  d  } x| D]y} | j t  r7 q n  | j t  d k rb | | j t   } n  | j d  d k rľ d | k r d | j   GHn  | j d  d j   | d <q | j d  } | j d  d j   } xľ | d k r| j d | d  d k r2| | d | j d | d  !j   } n | | d j   } | j d  d k rq| | j d   } n  | | | <| j d | d  } qŕ Wq W| j   | S(	   s`   Generates a dictionary of <URL:owner> from the config file
  
  Reads in the config file line by line, according to the syntax 
  (<e-mail>:<name>[|URL]), and stores each URL as a key, with the 
  corresponding owner as its value, in order to be able to quickly identify 
  which person is in charge of which URL when reporting feedback via e-mail.
  t   ri˙˙˙˙t   |t   defaults:   Warning! Duplicate default owners detected. Setting to %s.t   :i    i   (   t   opent
   startswitht   comment_signt   findt   stript   splitt   close(   t   filenamet   ownerst   own_filet   linet   post   ownert   url(    (    sE   /vol/project/2012/wmproject2013/chandra/web-project/Crawler/owners.pyt
   gen_owners   s.     *

c   	      C   sX  i  } t  |  d  } x2| D]*} | j t  } | d k r\ | d k rO q n  | |  } n  | j d  } | d k r | j   } t   | | <q | |  j   } t   } x | d k r;| j d | d  d k r| | d | j d | d  !j   } n | | d j   } | j |  | j d | d  } qŻ W| | | <q W| j   | S(   s@   Generates a dictionary of <owner:[URLs]> from the config filename
  
  Mirrored result of the previous function, was added more for an easier way
  to read in information for the owner editing software. Can be moved into a
  separate file to completely separate the crawler and the editor's code bases
  if required.
  R   i˙˙˙˙i    R   i   (   R   R
   R	   R   t   sett   addR   (	   R   t   urlsR   R   t   cposR   R   t   ownedurlR   (    (    sE   /vol/project/2012/wmproject2013/chandra/web-project/Crawler/owners.pyt   gen_urls8   s.    	*
c         C   s˙   |  } |  j  d  d } y t j d | g  } Wn t k
 rG } n X| j d  } | d k r | | t d  | j d |  !j   } nl t t j	  } xP | D]H } |  | k rĽ d | k ré | | j d  d | j d	  !} n  PqĽ qĽ W| j
   | S(
   s   Given the login of an owner, returns the name associated or login
  
  Searches to file containing the definition of owners for the login provided
  and then checks if a name was given: if it was, it returns that name; if not,
  it returns the given login 
  t   @i    t   fingers   Name:i˙˙˙˙s   
R   i   R   (   R   t
   subprocesst   check_outputt   OSErrorR
   t   lenR   R   t   settingst   OWNER_CONFIG_FILENAMER   (   R   t   own_namet   logint
   finger_rest   oset   nameposR   R   (    (    sE   /vol/project/2012/wmproject2013/chandra/web-project/Crawler/owners.pyt   get_own_nameZ   s"    ,&
c         C   su  i  } i  } t  j |   }  t t j  } xč |  j   D]Ú } | d } d | k rŠ | j d  d } | j d |  } | d k r | | d } qý | | | !d } nT d }	 xK | j   D]= }
 |
 | k rí t |
  |	 k rí | |
 } n  t |
  }	 qź W| | | <d | | <q7 W| j   x2 t	 |  j    D] }
 | | |
 c d	 |
 7<q2Wxt
 | j    D]} t |  } d
 | t j | j d  d f | | | | <t | |  } d t j t j    j   t j t j   j d d   j   f | d <t j | d <| | | d <y) t j t j  } d | GH| j   Wqgt j k
 rl} | GHd | GHqgXqgWd S(   s)   Sends appropriate e-mails given the list of broken links found
  
  Given the list of broken links, as well as a dictionary of <URL:owner>, 
  builds up a message of the necessary feedback to send to the e-mail address
  associated with a particular URL, notifying of the broken links present.
  R   t   ~i   t   /i˙˙˙˙s   @imperial.ac.uki    sB   The following pages have been identified to contain broken links:
s   	%s
s   Dear %s,

Please find mocked versions of the pages in question at %s/%s, highlighting the broken links and take appropriate measures to fix them.

R   s'   [auto/crawler] Broken links on %s at %st   microsecondt   Subjectt   Fromt   Tos   Sent mail for %s.s   Error sending mail for %s.N(   t	   functionst   parse_errorsR   R"   R#   t   keysR
   R!   R   t   sortedR   t   valuesR)   t   MOCK_PAGE_LOCR   R   R    t   datet   nowt	   isoformatt   timet   replacet	   SMTP_FROMt   smtplibt   SMTPt   SMTP_SERVERt   quitt   SMTPException(   t	   err_linkst   ownurlst
   err_ownerst   msgR   t   lnkR   t   tpost   epost   curr_lenR   R$   t   mailt   st   smtpe(    (    sE   /vol/project/2012/wmproject2013/chandra/web-project/Crawler/owners.pyt   check_ownerst   sH    


/J	c   
      C   s  i  } x|  j    D]ó \ } } | d } d | k r | j d  d } | j d |  } | d k rw | | d } qß | | | !d } nT d } xK | j   D]= }	 |	 | k rĎ t |	  | k rĎ | |	 } n  t |	  } q W| | k rř i  | | <n  | | | | <q W| S(   NR   R*   i   R+   i˙˙˙˙s   @imperial.ac.uki    (   t	   iteritemsR
   R2   R!   (
   t   error_linksR   t   owners_and_pagesR   t   errsR   RF   RG   RH   t   ourl(    (    sE   /vol/project/2012/wmproject2013/chandra/web-project/Crawler/owners.pyt	   get_ownerŤ   s&    
	(   t   httplibR<   R    R"   R0   R   t   email.mime.textR   R	   R   R   R)   RL   RR   (    (    (    sE   /vol/project/2012/wmproject2013/chandra/web-project/Crawler/owners.pyt   <module>   s   		"	"		7