package dynamic;

import dynamic.sitebackup.*;

import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;

/**
 * Created with IntelliJ IDEA.
 * User: John
 * Date: 25/04/12
 * Time: 11:17
 * To change this template use File | Settings | File Templates.
 */
public class WebBackup {

    public static String CURRENTLY_ANALYSING = "";
    public static String BASE_URL = "http://www3.imperial.ac.uk";
    public static String SUB_URL = "/computing";
    static HashSet<String> analysed_pages = new HashSet<String>();

    public static void main(String[] args) throws IOException {

        DateFormat df = new SimpleDateFormat("yyyyMMdd_HH_mm_ss");
        String timestamp = df.format(new Date(System.currentTimeMillis()));

        String directory_name = "site_backup_" + timestamp;
        savePageAndSubPages(BASE_URL + SUB_URL, directory_name);

//        DirectoryZipper.zipDirectory(directory_name);

    }

    private static void savePageAndSubPages(String url, String directory) throws IOException {

        if(analysed_pages.contains(url))
            return;
        analysed_pages.add(url);
        System.out.println(url);
        CURRENTLY_ANALYSING = url;

        String contents = HTMLGrabber.getContents(url);
        if(contents.indexOf("computingnew") >= 0)
            flagError("\"computingnew\" spotted - " + url);

        PageSaver.savePage(directory, url, contents);

        ArrayList<String> links = LinkAnalyser.getAllInternalLinks(contents);

        for (String link : links) {
            savePageAndSubPages(link, directory);
        }

    }

    public static void flagError(String s) {
        System.err.println("************************************************************");
        System.err.println("** " + s);
        System.err.println("** analysing: " + CURRENTLY_ANALYSING);
        System.err.println("************************************************************");
    }
}
