#!/bin/bash
# This script expects two arguments: the first, for the URL to serve as the
# starting point of the crawling process, and the second to determine how much
# information is offered as feedback.
#
# The modes determine what errors are recorded as feedback:
#       * 1 - only 404 HTTP errors - pages that the server says don't exist
#       * 2 - only HTTP errors     - pages that the server can not provide to
#         [default]                the crawler for various reasons, i.e.
#                                  authentication
#       * 3 - both HTTP and URL    - all pages that the crawler could not 
#                                  access
# These levels can be modified in settings.py.
# * To be noted that using modes 1 or 2 will most likely lead to some 
# inconsistency between the anchors and the links, as an additional parsing
# step is done for some links in order to speed up the process and reduce 
# network traffic.

SCRIPT_NAME=crawler.py
DEFAULT_URL=www.doc.ic.ac.uk/
DEFAULT_MODE=1

if [ -z "$1" ]; then
  python $SCRIPT_NAME $DEFAULT_URL $DEFAULT_MODE
elif [ -z "$2" ]; then
  if [ "$1" = "DEBUG" ]; then
    python -m pdb $SCRIPT_NAME $DEFAULT_URL $DEFAULT_MODE
  else
  python $SCRIPT_NAME $1 $DEFAULT_MODE
  fi
else 
  python $SCRIPT_NAME $1 $2
fi
