ó
ˆ~ÌQc           @   sD   d  d l  m Z d  d l Z d „  Z d „  Z d „  Z d „  Z d S(   iÿÿÿÿ(   t   escapeNc   	      C   sÉ   t  t |  ƒ ƒ } t | ƒ } d } d } d |  d } d } xf | D]^ } | t | | | !ƒ 7} | | 7} | j d | ƒ d } | t | | | !ƒ 7} | | 7} qE W| t | | ƒ 7} | d 7} | S(   Ns   <a id="broken_link">s   </a>sê   
						<!DOCTYPE html>
						<html>
						<head>
							<link rel="stylesheet" type="text/css" href="/html/css/seesource.css">
						</head>
						<body>
							<div id="head_info">
								<p class="h1">The source code for the page: sw   .</p>
								<p class="h2">Broken Links are highlighted red. <p>
							</div>
						</br>
						<div id="content">
		i    i   s4   
							</div>
							</body>
							</html>
							(   t   clear_sourcet   get_source_codet   get_positionR    t   find(	   t   urlt   pointerst   sourcet	   positionst	   start_csst   end_csst   contentt   pointert   p(    (    sV   /vol/project/2012/wmproject2013/chandra/web-project/django-server/crawler/functions.pyt   get_code_to_display   s"    	
c         C   s   t  j |  d i d d 6ƒ} y3 t  j | d d ƒ} | j ƒ  } | j ƒ  | SWn+ t  j k
 r| } d | j | j f GHn Xd S(   Nt   headerss   */*t   Acceptt   timeoutix   s   HTTP Error : %s : %st   Error(   t   urllib2t   Requestt   urlopent   readt   closet	   HTTPErrort   codet   reason(   R   t   reqt   usockR   t   httperr(    (    sV   /vol/project/2012/wmproject2013/chandra/web-project/django-server/crawler/functions.pyR   )   s    
c         C   s]   xV |  j  d ƒ d k rX |  j  d ƒ } |  j  d | ƒ } |  |  |  | t d ƒ }  q W|  S(   sô    Removes certain code from a HTML source
  
  Given a HTML source, it clear certain code that is of no interest, i.e.
  comments, and returns that string. Can be extended to exclude any code that 
  should not be parsed for URLs, as desired.
  s   <!--iÿÿÿÿs   -->(   R   t   len(   R   t   post   epos(    (    sV   /vol/project/2012/wmproject2013/chandra/web-project/django-server/crawler/functions.pyR   4   s
     c         C   s^   |  j  d ƒ } | j ƒ  g  } x. | D]& } | j t | j  d ƒ d ƒ ƒ q& W| j ƒ  | S(   Nt   |t   ;i   (   t   splitt   popt   appendt   intt   sort(   R   R   R   t   u(    (    sV   /vol/project/2012/wmproject2013/chandra/web-project/django-server/crawler/functions.pyR   D   s    
$
(   t   django.utils.htmlR    R   R   R   R   R   (    (    (    sV   /vol/project/2012/wmproject2013/chandra/web-project/django-server/crawler/functions.pyt   <module>   s
   	$		