/* * dbparse.c: * parses, as best it can, a Netscape-style database file. * (Historically one recognized such files by Netscape giving them * the extension ".db", although version 4.6 for Linux seems to use * "history.dat" as its history database file, * despite "history.db" being specified in the preferences.) * Writes to the standard output. * * usage: dbparse [--html] [--notbefore ] * * Only entries with timestamps between the specified "not before" time * (or the Unix big bang if not specified) and the current time inclusive * are displayed. Format expected for the "not before" time string * is like this: "Tue Sep 22 19:12:44 1998". * * Output format *without* --html flag is one entry per line, like this: * numeric-timestamp [TAB] timestamp-as-string [TAB] * db-file-entry-URL [TAB] db-file-entry-title [NEWLINE]. * * Output format *with* --html flag is one entry per line, like this: * numeric-timestamp [TAB] *

* [db-file-entry-title] ([db-file-entry-URL]) * * - [timestamp-as-string] *

[NEWLINE]. * * In both cases an empty title is rendered as "Untitled". * * In the --html format, '"' characters in the first displaying of the URL * (the giving of it as an anchor) are rendered as "%22"; * and similarly, "<" and ">" characters in the title and the * subsequent (second) displaying of the URL are rendered as * "<" and ">" respectively. With these renderings we avoid unwanted * "HTML meta-character interpretations" by browsers, which could * potentially mess up the display of the rest of the page. * * * NOTE: This source code is not very polished. */ #include #include #include #include #include #include #define bool int #define TRUE 1 #define FALSE 0 #define MAX_DB_FILE_ENTRY_TITLE_LENGTH 65536 #define MAX_DB_FILE_ENTRY_URL_LENGTH 65536 void main(int argc, char *argv[]) { int ch, time0, time1, time2, time3, count, db_file_entry_title_position, db_file_entry_URL_position; time_t db_file_entry_timestamp, not_before_time, current_time; struct tm not_before_time_as_struct_tm; char *htmlflag, *notbeforeflag, *not_before_time_as_string, *filename, *i; char db_file_entry_title[MAX_DB_FILE_ENTRY_TITLE_LENGTH+1]; char db_file_entry_URL [MAX_DB_FILE_ENTRY_URL_LENGTH +1]; bool output_as_html, use_not_before_time_given_as_argument; FILE *filehandle; /* * argument checking */ if (argc == 2) { output_as_html=FALSE; use_not_before_time_given_as_argument=FALSE; filename=argv[1]; if (strcmp(filename, "--html") == 0 || strcmp(filename, "--notbefore") == 0) { fprintf(stderr, "usage: %s [--html] [--notbefore ] \n", argv[0]); exit(1); } } else if (argc == 3) { htmlflag=argv[1]; if (strcmp(htmlflag, "--html") != 0) { fprintf(stderr, "usage: %s [--html] [--notbefore ] \n", argv[0]); exit(1); } output_as_html=TRUE; use_not_before_time_given_as_argument=FALSE; filename=argv[2]; if (strcmp(filename, "--html") == 0 || strcmp(filename, "--notbefore") == 0) { fprintf(stderr, "usage: %s [--html] [--notbefore ] \n", argv[0]); exit(1); } } else if (argc == 4) { notbeforeflag=argv[1]; if (strcmp(notbeforeflag, "--notbefore") != 0) { fprintf(stderr, "usage: %s [--html] [--notbefore ] \n", argv[0]); exit(1); } output_as_html=FALSE; use_not_before_time_given_as_argument=TRUE; not_before_time_as_string=argv[2]; if (strcmp(not_before_time_as_string, "--html") == 0 || strcmp(not_before_time_as_string, "--notbefore") == 0) { fprintf(stderr, "usage: %s [--html] [--notbefore ] \n", argv[0]); exit(1); } filename=argv[3]; if (strcmp(filename, "--html") == 0 || strcmp(filename, "--notbefore") == 0) { fprintf(stderr, "usage: %s [--html] [--notbefore ] \n", argv[0]); exit(1); } } else if (argc == 5) { htmlflag=argv[1]; if (strcmp(htmlflag, "--html") != 0) { fprintf(stderr, "usage: %s [--html] [--notbefore ] \n", argv[0]); exit(1); } notbeforeflag=argv[2]; if (strcmp(notbeforeflag, "--notbefore") != 0) { fprintf(stderr, "usage: %s [--html] [--notbefore ] \n", argv[0]); exit(1); } output_as_html=TRUE; use_not_before_time_given_as_argument=TRUE; not_before_time_as_string=argv[3]; if (strcmp(not_before_time_as_string, "--html") == 0 || strcmp(not_before_time_as_string, "--notbefore") == 0) { fprintf(stderr, "usage: %s [--html] [--notbefore ] \n", argv[0]); exit(1); } filename=argv[4]; if (strcmp(filename, "--html") == 0 || strcmp(filename, "--notbefore") == 0) { fprintf(stderr, "usage: %s [--html] [--notbefore ] \n", argv[0]); exit(1); } } else { fprintf(stderr, "usage: %s [--html] [--notbefore ] \n", argv[0]); exit(1); } /* * do we have a readable file? */ filehandle=fopen(filename, "r"); if (filehandle == NULL) { fprintf(stderr, "%s: cannot open file \"%s\" for reading!\n", argv[0], filename); exit(1); } /* * compute the "not before" time * (for rejecting entries whose timestamps are ostensibly * earlier than this) */ if (use_not_before_time_given_as_argument) { strptime(not_before_time_as_string, "%a %b %d %H:%M:%S %Y", ¬_before_time_as_struct_tm); not_before_time=mktime(¬_before_time_as_struct_tm); if (not_before_time == (time_t) -1) { fprintf(stderr, "%s: cannot interpret \"not before\" time \"%s\"!\n", argv[0], not_before_time_as_string); exit(1); } } else { not_before_time=0; } /* * get the current time * (for rejecting entries whose timestamps are ostensibly * in the future) */ current_time=time(NULL); if (current_time == (time_t) -1) { fprintf(stderr, "%s: cannot determine current time!\n", argv[0]); exit(1); } /* * OK, now the parsing proper! */ ch=getc(filehandle); while (ch != EOF) { /* * find a null character if there is one */ while (ch != EOF && ch != '\0') { ch=getc(filehandle); } if (ch=='\0') { /* * see if the next 4 bytes are a "sensible" * entry-timestamp * (defined as between the "not before" time * and the current time inclusive) */ ch=getc(filehandle); time0=ch; ch=getc(filehandle); time1=ch; ch=getc(filehandle); time2=ch; ch=getc(filehandle); time3=ch; db_file_entry_timestamp=time0+256*time1+256*256*time2+256*256*256*time3; if (db_file_entry_timestamp >= not_before_time && db_file_entry_timestamp <= current_time) { /* * read a further 12 bytes, * which seem to be meant to be * (4 bytes each): * another time-valued thing, * a counter of some sort, * another counter of some sort.(!) * But here we just zip past them all. */ for (count=0; count<12; count++) { ch=getc(filehandle); } /* * collect up what we hope is a * db-file-entry title part. * (we collect only "printable" * characters, defined as having * 7-bit ascii values greater than * or equal to space.) */ ch=getc(filehandle); db_file_entry_title_position=0; while (ch != '\0' && ch != EOF) { if (db_file_entry_title_position < MAX_DB_FILE_ENTRY_TITLE_LENGTH && ch>=' ' && ch<=127) { db_file_entry_title[db_file_entry_title_position++]=ch; } ch=getc(filehandle); } db_file_entry_title[db_file_entry_title_position]='\0'; /* * Convert an empty title to "Untitled". */ if (strcmp(db_file_entry_title, "") == 0) { strcpy(db_file_entry_title, "Untitled"); } /* * see if we then have a "sensible" * db-file-entry URL part. * (defined as starting with an * alphanumeric character: * OK, not an ideal definition!) */ ch=getc(filehandle); if (isalnum(ch)) { /* * collect up this supposedly * "sensible" db-file-entry * URL part. * (we collect only "printable" * characters, defined as having * 7-bit ascii values greater than * or equal to space.) */ db_file_entry_URL_position=0; while (ch != '\0' && ch != EOF) { if (db_file_entry_URL_position < MAX_DB_FILE_ENTRY_URL_LENGTH && ch>=' ' && ch<=127) { db_file_entry_URL[db_file_entry_URL_position++]=ch; } ch=getc(filehandle); } db_file_entry_URL[db_file_entry_URL_position]='\0'; /* * OK, we're now ready to print * out the current line. */ if (output_as_html) { printf("%d\t

"); for (i=db_file_entry_title; *i != '\0'; i++) { switch (*i) { case '<' : printf("<"); break; case '>' : printf(">"); break; default : putchar(*i); break; } } printf(" ("); for (i=db_file_entry_URL; *i != '\0'; i++) { switch (*i) { case '<' : printf("<"); break; case '>' : printf(">"); break; default : putchar(*i); break; } } printf(") - "); for (i=ctime(&db_file_entry_timestamp); *i != '\n' && *i != '\0'; i++) { putchar(*i); } printf("

\n"); } else { printf("%d\t", db_file_entry_timestamp); for (i=ctime(&db_file_entry_timestamp); *i != '\n' && *i != '\0'; i++) { putchar(*i); } printf("\t%s\t%s\n", db_file_entry_URL, db_file_entry_title); } } } } } }