from django.utils.html import escape
import urllib2


def get_code_to_display(url, pointers):
	source = clear_source(get_source_code(url))
	positions = get_position(pointers)
	start_css = '<a id="broken_link">'
	end_css = '</a>'
	content = """
						<!DOCTYPE html>
						<html>
						<head>
							<link rel="stylesheet" type="text/css" href="/html/css/seesource.css">
						</head>
						<body>
							<div id="head_info">
								<p class="h1">The source code for the page: """ +url + """.</p>
								<p class="h2">Broken Links are highlighted red. <p>
							</div>
						</br>
						<div id="content">
		"""
	pointer = 0
	for p in positions:
		content += escape(source[pointer:p])
		content += start_css
		pointer = source.find('</a>',p) + 4
		content += escape(source[p:pointer]) # lenght of </a> is 4
		content += end_css
	content += escape(source[pointer:])
	content += """
							</div>
							</body>
							</html>
							"""
	return content



def get_source_code(url):
	req = urllib2.Request(url, headers={"Accept" : "*/*"})
	try:
		usock = urllib2.urlopen(req, timeout = 120) 
		source = usock.read()
		usock.close()
		return source.decode('utf-8','ignore')
	except urllib2.HTTPError as httperr:
		print "HTTP Error : %s : %s" % (httperr.code, httperr.reason)
	return 'Error'

def clear_source (source):
  ''' Removes certain code from a HTML source
  
  Given a HTML source, it clear certain code that is of no interest, i.e.
  comments, and returns that string. Can be extended to exclude any code that 
  should not be parsed for URLs, as desired.
  '''
  while source.find("<!--") != -1:
    pos = source.find("<!--")
    epos = source.find("-->", pos)
    #if epos > source.find("<!--", pos+1) or epos == -1:
      #source = source[:pos] + source[pos+len("<!--"):]
    #else:
    source = source[:pos] + source[epos+len("-->"):]
  return source

def get_position(pointers):
	url = pointers.split('|')
	url.pop() #last element is empty string so to remove it
	positions = []
	for u in url:
		positions.append(int(u.split(';')[1]))
	positions.sort()
	return positions