/*
 *	dbparse.c:
 *	parses, as best it can, a Netscape-style database file.
 *	(Historically one recognized such files by Netscape giving them
 *	the extension ".db", although version 4.6 for Linux seems to use
 *	"history.dat" as its history database file,
 *	despite "history.db" being specified in the preferences.)
 *	Writes to the standard output.
 *
 *	usage: dbparse [--html] [--notbefore <time-as-string>] <db-file-name>
 *
 *	Only entries with timestamps between the specified "not before" time
 *	(or the Unix big bang if not specified) and the current time inclusive
 *	are displayed. Format expected for the "not before" time string
 *	is like this: "Tue Sep 22 19:12:44 1998".
 *
 *	Output format *without* --html flag is one entry per line, like this:
 *	numeric-timestamp [TAB] timestamp-as-string [TAB]
 *	db-file-entry-URL [TAB] db-file-entry-title [NEWLINE].
 *
 *	Output format *with* --html flag is one entry per line, like this:
 *	numeric-timestamp [TAB]
 *	<LI> <A HREF="[db-file-entry-URL]">
 *		[db-file-entry-title] <I>([db-file-entry-URL])</I>
 *	     </A>
 *	     <I> - [timestamp-as-string]</I>
 *	</LI> [NEWLINE].
 *
 *	In both cases an empty title is rendered as "Untitled".
 *
 *	In the --html format, '"' characters in the first displaying of the URL
 *	(the giving of it as an anchor) are rendered as "%22";
 *	and similarly, "<" and ">" characters in the title and the
 *	subsequent (second) displaying of the URL are rendered as
 *	"&lt;" and "&gt;" respectively. With these renderings we avoid unwanted
 *	"HTML meta-character interpretations" by browsers, which could
 *	potentially mess up the display of the rest of the page.
 *
 *
 *	NOTE: This source code is not very polished.
 */

#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>
#include <sys/time.h>

#define bool	int
#define TRUE	1
#define FALSE	0

#define MAX_DB_FILE_ENTRY_TITLE_LENGTH	65536
#define MAX_DB_FILE_ENTRY_URL_LENGTH	65536


void main(int argc, char *argv[])
{
	int ch, time0, time1, time2, time3, count,
		db_file_entry_title_position, db_file_entry_URL_position;
	time_t db_file_entry_timestamp, not_before_time, current_time;
	struct tm not_before_time_as_struct_tm;
	char *htmlflag, *notbeforeflag, *not_before_time_as_string,
		*filename, *i;
	char db_file_entry_title[MAX_DB_FILE_ENTRY_TITLE_LENGTH+1];
	char db_file_entry_URL  [MAX_DB_FILE_ENTRY_URL_LENGTH  +1];
	bool output_as_html, use_not_before_time_given_as_argument;
	FILE *filehandle;


	/*
	 *	argument checking
	 */
	if (argc == 2) {

		output_as_html=FALSE;
		use_not_before_time_given_as_argument=FALSE;
		filename=argv[1];
		if (strcmp(filename, "--html") == 0 || strcmp(filename, "--notbefore") == 0) {
			fprintf(stderr, "usage: %s [--html] [--notbefore <time-as-string>] <db-file-name>\n", argv[0]);
			exit(1);
		}

	} else if (argc == 3) {

		htmlflag=argv[1];
		if (strcmp(htmlflag, "--html") != 0) {
			fprintf(stderr, "usage: %s [--html] [--notbefore <time-as-string>] <db-file-name>\n", argv[0]);
			exit(1);
		}

		output_as_html=TRUE;
		use_not_before_time_given_as_argument=FALSE;
		filename=argv[2];
		if (strcmp(filename, "--html") == 0 || strcmp(filename, "--notbefore") == 0) {
			fprintf(stderr, "usage: %s [--html] [--notbefore <time-as-string>] <db-file-name>\n", argv[0]);
			exit(1);
		}

	} else if (argc == 4) {

		notbeforeflag=argv[1];
		if (strcmp(notbeforeflag, "--notbefore") != 0) {
			fprintf(stderr, "usage: %s [--html] [--notbefore <time-as-string>] <db-file-name>\n", argv[0]);
			exit(1);
		}

		output_as_html=FALSE;
		use_not_before_time_given_as_argument=TRUE;
		not_before_time_as_string=argv[2];
		if (strcmp(not_before_time_as_string, "--html") == 0 || strcmp(not_before_time_as_string, "--notbefore") == 0) {
			fprintf(stderr, "usage: %s [--html] [--notbefore <time-as-string>] <db-file-name>\n", argv[0]);
			exit(1);
		}
		filename=argv[3];
		if (strcmp(filename, "--html") == 0 || strcmp(filename, "--notbefore") == 0) {
			fprintf(stderr, "usage: %s [--html] [--notbefore <time-as-string>] <db-file-name>\n", argv[0]);
			exit(1);
		}

	} else if (argc == 5) {

		htmlflag=argv[1];
		if (strcmp(htmlflag, "--html") != 0) {
			fprintf(stderr, "usage: %s [--html] [--notbefore <time-as-string>] <db-file-name>\n", argv[0]);
			exit(1);
		}

		notbeforeflag=argv[2];
		if (strcmp(notbeforeflag, "--notbefore") != 0) {
			fprintf(stderr, "usage: %s [--html] [--notbefore <time-as-string>] <db-file-name>\n", argv[0]);
			exit(1);
		}

		output_as_html=TRUE;
		use_not_before_time_given_as_argument=TRUE;
		not_before_time_as_string=argv[3];
		if (strcmp(not_before_time_as_string, "--html") == 0 || strcmp(not_before_time_as_string, "--notbefore") == 0) {
			fprintf(stderr, "usage: %s [--html] [--notbefore <time-as-string>] <db-file-name>\n", argv[0]);
			exit(1);
		}
		filename=argv[4];
		if (strcmp(filename, "--html") == 0 || strcmp(filename, "--notbefore") == 0) {
			fprintf(stderr, "usage: %s [--html] [--notbefore <time-as-string>] <db-file-name>\n", argv[0]);
			exit(1);
		}

	} else {

		fprintf(stderr, "usage: %s [--html] [--notbefore <time-as-string>] <db-file-name>\n", argv[0]);
		exit(1);

	}


	/*
	 *	do we have a readable file?
	 */
	filehandle=fopen(filename, "r");
	if (filehandle == NULL) {
		fprintf(stderr, "%s: cannot open file \"%s\" for reading!\n", argv[0], filename);
		exit(1);
	}


	/*
	 *	compute the "not before" time
	 *	(for rejecting entries whose timestamps are ostensibly
	 *	earlier than this)
	 */
	if (use_not_before_time_given_as_argument) {
		strptime(not_before_time_as_string, "%a %b %d %H:%M:%S %Y", &not_before_time_as_struct_tm);
		not_before_time=mktime(&not_before_time_as_struct_tm);
		if (not_before_time == (time_t) -1) {
			fprintf(stderr, "%s: cannot interpret \"not before\" time \"%s\"!\n", argv[0], not_before_time_as_string);
			exit(1);
		}
	} else {
		not_before_time=0;
	}


	/*
	 *	get the current time
	 *	(for rejecting entries whose timestamps are ostensibly
	 *	in the future)
	 */
	current_time=time(NULL);
	if (current_time == (time_t) -1) {
		fprintf(stderr, "%s: cannot determine current time!\n", argv[0]);
		exit(1);
	}


	/*
	 *	OK, now the parsing proper!
	 */
	ch=getc(filehandle);

	while (ch != EOF) {
		/*
		 *	find a null character if there is one
		 */
		while (ch != EOF && ch != '\0') {
			ch=getc(filehandle);
		}

		if (ch=='\0') {
			/*
			 *	see if the next 4 bytes are a "sensible"
			 *	entry-timestamp
			 *	(defined as between the "not before" time
			 *	and the current time inclusive)
			 */
			ch=getc(filehandle); time0=ch;
			ch=getc(filehandle); time1=ch;
			ch=getc(filehandle); time2=ch;
			ch=getc(filehandle); time3=ch;
			db_file_entry_timestamp=time0+256*time1+256*256*time2+256*256*256*time3;

			if (db_file_entry_timestamp >= not_before_time && db_file_entry_timestamp <= current_time) {
				/*
				 *	read a further 12 bytes,
				 *	which seem to be meant to be
				 *	(4 bytes each):
				 *	another time-valued thing,
				 *	a counter of some sort,
				 *	another counter of some sort.(!)
				 *	But here we just zip past them all.
				 */
				for (count=0; count<12; count++) {
					ch=getc(filehandle);
				}

				/*
				 *	collect up what we hope is a
				 *	db-file-entry title part.
				 *	(we collect only "printable"
				 *	characters, defined as having
				 *	7-bit ascii values greater than
				 *	or equal to space.)
				 */
				ch=getc(filehandle);
				db_file_entry_title_position=0;

				while (ch != '\0' && ch != EOF) {
					if (db_file_entry_title_position < MAX_DB_FILE_ENTRY_TITLE_LENGTH && ch>=' ' && ch<=127) {
						db_file_entry_title[db_file_entry_title_position++]=ch;
					}
					ch=getc(filehandle);
				}
				db_file_entry_title[db_file_entry_title_position]='\0';

				/*
				 *	Convert an empty title to "Untitled".
				 */
				if (strcmp(db_file_entry_title, "") == 0) {
					strcpy(db_file_entry_title, "Untitled");
				}

				/*
				 *	see if we then have a "sensible"
				 *	db-file-entry URL part.
				 *	(defined as starting with an
				 *	alphanumeric character:
				 *	OK, not an ideal definition!)
				 */
				ch=getc(filehandle);
				if (isalnum(ch)) {
					/*
					 *	collect up this supposedly
					 *	"sensible" db-file-entry
					 *	URL part.
					 *	(we collect only "printable"
					 *	characters, defined as having
					 *	7-bit ascii values greater than
					 *	or equal to space.)
					 */
					db_file_entry_URL_position=0;

					while (ch != '\0' && ch != EOF) {
						if (db_file_entry_URL_position < MAX_DB_FILE_ENTRY_URL_LENGTH && ch>=' ' && ch<=127) {
							db_file_entry_URL[db_file_entry_URL_position++]=ch;
						}
						ch=getc(filehandle);
					}
					db_file_entry_URL[db_file_entry_URL_position]='\0';

					/*
					 *	OK, we're now ready to print
					 *	out the current line.
					 */
					if (output_as_html) {
						printf("%d\t<LI> <A HREF=\"", db_file_entry_timestamp);
						for (i=db_file_entry_URL; *i != '\0'; i++) {
							switch (*i) {
							case '"' :
								printf("%%22");
								break;
							default :
								putchar(*i);
								break;
							}
						}
						printf("\">");
						for (i=db_file_entry_title; *i != '\0'; i++) {
							switch (*i) {
							case '<' :
								printf("&lt;");
								break;
							case '>' :
								printf("&gt;");
								break;
							default :
								putchar(*i);
								break;
							}
						}
						printf(" <I>(");
						for (i=db_file_entry_URL; *i != '\0'; i++) {
							switch (*i) {
							case '<' :
								printf("&lt;");
								break;
							case '>' :
								printf("&gt;");
								break;
							default :
								putchar(*i);
								break;
							}
						}
						printf(")</I></A> <I> - ");
						for (i=ctime(&db_file_entry_timestamp); *i != '\n' && *i != '\0'; i++) {
							putchar(*i);
						}
						printf("</I> </LI>\n");
					} else {
						printf("%d\t", db_file_entry_timestamp);
						for (i=ctime(&db_file_entry_timestamp); *i != '\n' && *i != '\0'; i++) {
							putchar(*i);
						}
						printf("\t%s\t%s\n", db_file_entry_URL, db_file_entry_title);
					}
				}
			}
		}
	}
}
