// minimal tool to compare two text files

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define ARRAYSIZE(_x) (sizeof _x / sizeof _x[0])

typedef int wildcardfunc(FILE * f2);

struct wildcardtype {
    char         * name;
    wildcardfunc * func;
};


static int wildcard_path(FILE * f1);
static int wildcard_integer(FILE * f1);

struct wildcardtype wildcards[] = {
    { "PATH", wildcard_path },
    { "INTEGER", wildcard_integer }
};

static wildcardfunc * currentwildcardfunc = 0;

static int binary = 0;
static int empty = 0;
static int skiplines_left = 0;
static int skiplines_right = 0;
static int use_wildcards = 0;
static char * filename_left = 0;
static char * filename_right = 0;

/* LOOKAHEADBUFFERSIZE must be a power of 2, because the wrap-around for the
   indices requires this!
*/
#define LOOKAHEADBUFFERSIZE 0x80
#define WILDCARDCHAR_OPEN     '<'
#define WILDCARDCHAR_INTERNAL '#'
#define WILDCARDCHAR_CLOSE    '>'

#define WILDCARDCHAR_OPEN_CLOSE_COUNT 3
#define WILDCARDNAME_MAXLENGTH 20

static char lookaheadbuffer[LOOKAHEADBUFFERSIZE] = { 0 };
static int  lookaheadindexread = 0;
static int  lookaheadindexwrite = 0;

static int  wildcardendchar;

wildcardfunc * findwildcardfunc(char * wildcardname)
{
    wildcardfunc * func = NULL;

    unsigned int i;

    for (i = 0; i < ARRAYSIZE(wildcards); ++i) {
        if (strcmp(wildcards[i].name, wildcardname) == 0) {
            func = wildcards[i].func;
        }
    }

    return func;
}

static int handleargparameter(int offset, char * parameter)
{
    long number = -1;
    char * endptr = NULL;

    if (parameter[offset++] != '=') {
        return -1;
    }

    number = strtol(parameter + offset, &endptr, 10);

    if (endptr == NULL || *endptr != 0) {
        return -1;

    }
    return number;
}

static int handleparameter(int argc, char *argv[])
{
    static const char opt_binary[]    = "--binary";
    static const char opt_empty[]     = "--empty";
    static const char opt_skipleft[]  = "--skipleft";
    static const char opt_skipright[] = "--skipright";
    static const char opt_skip[]      = "--skip";
    static const char opt_wildcards[] = "--wildcards";

    static const char len_skipleft    = sizeof opt_skipleft - 1;
    static const char len_skipright   = sizeof opt_skipright - 1;
    static const char len_skip        = sizeof opt_skip - 1;

    int argindex = 1;

    if (argc < 2) {
        return -1;
    }

    while (argindex < argc && argv[argindex] && argv[argindex][0] == '-') {
        if (strcmp(argv[argindex], opt_binary) == 0) {
            if (empty || skiplines_left || skiplines_right) {
                fprintf(stderr, "--empty cannot go with other options.\n");
                exit(1);
            }
            binary = 1;
        }
        else if (strcmp(argv[argindex], opt_empty) == 0) {
            if (binary || skiplines_left || skiplines_right || use_wildcards) {
                fprintf(stderr, "--binary cannot go with other options.\n");
                exit(1);
            }
            empty = 1;
        }
        else if (strncmp(argv[argindex], opt_skipleft, len_skipleft) == 0) {
            if (binary || empty) {
                fprintf(stderr, "%s cannot go with other options.\n", opt_skipleft);
                exit(1);
            }
            skiplines_left = handleargparameter(len_skipleft, argv[argindex]);
            if (skiplines_left < 0) {
                fprintf(stderr, "%s: you must specify the number of lines\n", opt_skipleft);
            }
        }
        else if (strncmp(argv[argindex], opt_skipright, len_skipright) == 0) {
            if (binary || empty) {
                fprintf(stderr, "%s cannot go with other options.\n", opt_skipright);
                exit(1);
            }
            skiplines_right = handleargparameter(len_skipright, argv[argindex]);
            if (skiplines_right < 0) {
                fprintf(stderr, "%s: you must specify the number of lines\n", opt_skipright);
            }
        }
        else if (strncmp(argv[argindex], opt_skip, len_skip) == 0) {
            if (binary || empty) {
                fprintf(stderr, "%s cannot go with other options.\n", opt_skipright);
                exit(1);
            }
            skiplines_left = skiplines_right = handleargparameter(len_skip, argv[argindex]);
            if (skiplines_left < 0) {
                fprintf(stderr, "%s: you must specify the number of lines\n", opt_skip);
            }
        }
        else if (strcmp(argv[argindex], opt_wildcards) == 0) {
            if (binary) {
                fprintf(stderr, "--wildcards cannot go with --binary.\n");
                exit(1);
            }
            use_wildcards = 1;
        }
        ++argindex;
    }

    if (argc + empty - argindex != 2) {
        fprintf(stderr, "filenames are missing!\n");
        return -1;
    }

    filename_left  = argv[argindex++];
    filename_right = argv[argindex++];

    return 0;
}

/* get the next character from FILE and convert commonly used line-endings all
   into the same value (0x0a, as used on *nix systems)

   recognized values/pairs:

   0x0a (LF)                Linux, macOS
   0x0d, 0x0a (CR, LF)      Windows, MSDOS, OS/2
   0x0d (CR)                classic MacOS
*/

static int getnext(FILE *f)
{
    int c = fgetc(f);
    if (!binary && c == 0x0d) {
        if (!feof(f)) {
            int n = fgetc(f);
            if (n != 0x0a) {
                ungetc(n, f);
            }
            clearerr(f); /* clears EOF when we did not push back */
        }
        return 0x0a;
    }
    return c;
}

static void skiplines(FILE *f, int skipcount)
{
   int c;

   while (skipcount > 0) {
       c = getnext(f);
       if (feof(f)) {
           return;
       }

       if (c == 0x0a) {
           --skipcount;
       }
   }
}

static int comparefiles(FILE *f1, FILE *f2)
{
    for(;;) {
        if (feof(f1) && feof(f2)) {
            return EXIT_SUCCESS;
        } else if (feof(f1) || feof(f2)) {
            return EXIT_FAILURE;
        }
        if (getnext(f1) != getnext(f2)) {
            return EXIT_FAILURE;
        }
    }
}

static int lookaheadbufferisempty(void)
{
    return lookaheadindexread == lookaheadindexwrite;
}

static char lookaheadbufferread(void)
{
    char ch;

    if (lookaheadbufferisempty()) {
        fprintf(stderr, "### want to take from lookahead buffer, but it is empty --> aborting!\n");
        exit(EXIT_FAILURE);
    }

    ch = lookaheadbuffer[lookaheadindexread];

    /* advance read pointer, with wrap-around */
    lookaheadindexread = (lookaheadindexread + 1) & (sizeof lookaheadbuffer - 1);

    return ch;
}

static void lookaheadbufferwrite(char ch)
{
    lookaheadbuffer[lookaheadindexwrite] = ch;

    /* advance write pointer, with wrap-around */
    lookaheadindexwrite = (lookaheadindexwrite + 1) & (sizeof lookaheadbuffer - 1);

    if (lookaheadbufferisempty()) {
        fprintf(stderr, "### lookahead buffer ovrrun, aborting!\n");
        exit(EXIT_FAILURE);
    }
}

static int processwildcardchar(FILE *f1)
{
    int countwildcardchar = 1;
    int foundwildcard = 0;
    int ch;

    static char wildcardname[WILDCARDNAME_MAXLENGTH + 1];
    unsigned int wildcardnamenextindex;

    while ((ch = getnext(f1)) == WILDCARDCHAR_OPEN) {
        ++countwildcardchar;
        if (feof(f1)) {
            break;
        }
    }

    if (countwildcardchar == WILDCARDCHAR_OPEN_CLOSE_COUNT) {
        if (ch == WILDCARDCHAR_INTERNAL) {
            /* we found a wildcard! */
            foundwildcard = 1;
        }
    }
    else {
        if (ch == WILDCARDCHAR_OPEN) {
            ch = -1;
        }
    }

    if (foundwildcard) {
        char c;

        /* we found a wildcard init sequence; now check which wildcard it is */

        wildcardnamenextindex = 0;
        wildcardname[WILDCARDNAME_MAXLENGTH] = 0;

        c = getnext(f1);
        while (c != WILDCARDCHAR_INTERNAL && c != 0x0a && !feof(f1)) {
            wildcardname[wildcardnamenextindex] = c;
            if (++wildcardnamenextindex >= WILDCARDNAME_MAXLENGTH) {
                wildcardname[WILDCARDNAME_MAXLENGTH] = 0;
                fprintf(stderr, "wildcard '%s' is too long!\n", wildcardname);
                exit(EXIT_FAILURE);
            }
            c = getnext(f1);
        }

        wildcardname[wildcardnamenextindex] = 0;

        if (c == WILDCARDCHAR_INTERNAL) {
            int countwildcardcharclose = 0;

            fprintf(stderr, "Found wildcard '%s'\n", wildcardname);

            while ((c = getnext(f1)) == WILDCARDCHAR_CLOSE && !feof(f1)) {
                ++countwildcardcharclose;
            }

            wildcardendchar = c; /* remember next char */

            currentwildcardfunc = findwildcardfunc(wildcardname);
            if (currentwildcardfunc == NULL) {
                fprintf(stderr, "Wildcard '%s' is unknown!\n", wildcardname);
                exit(EXIT_FAILURE);
            }
        }
        else {
            fprintf(stderr, "Thought I found wildcard '%s', but it does not end.\n", wildcardname);
        }
    }

    if (!foundwildcard) {
        int i;

        /* no wildcard; restore the chars */
        for (i = 0; i < countwildcardchar; ++i) {
            lookaheadbufferwrite(WILDCARDCHAR_OPEN);
        }

        if (ch >= 0) {
            lookaheadbufferwrite(ch);
        }
    }

    return foundwildcard;
}

static int wildcard_path(FILE * f2)
{
    int isstillwildcard = 1;
    static int allowedcolonin = 2;

    int ch = getnext(f2);

    if (allowedcolonin >= 0) {
        --allowedcolonin;
    }

    if ((ch == wildcardendchar) || ch < ' ' || ch > 126) {
        /* this is not a path char anymore, abort the wildcard processing */

        /* first of all, ignore a colon at position 2 if it is the wildcardendchar.
         * This is needed for windows path specifications, which can begin with
         * a drive specifier and a colon.
         */

        if ( (allowedcolonin >= 0) && (ch == ':') ) {
            fprintf(stderr, "Ignoring ':' at drive specifier, do not end the path here!\n");
        }
        else {
            isstillwildcard = 0;

            allowedcolonin = 2;

            if (ch != wildcardendchar) {
                exit(EXIT_FAILURE);
            }
        }
    }


    return isstillwildcard;
}

static int wildcard_integer(FILE * f2)
{
    int isstillwildcard = 1;
    int ch = getnext(f2);

    if ((ch == wildcardendchar) || ch < '0' || ch > '9') {
        /* this is not a digit anymore, abort the wildcard processing */
        isstillwildcard = 0;

        if (ch != wildcardendchar) {
            exit(EXIT_FAILURE);
        }
    }

    return isstillwildcard;
}

static int comparefileswithwildcards(FILE *f1, FILE *f2)
{
    static int iswildcard = 0;

    for(;;) {
        int c1;

        if (lookaheadbufferisempty() && !iswildcard && feof(f1) && feof(f2)) {
            return EXIT_SUCCESS;
        } else if (((lookaheadbufferisempty() && !iswildcard && feof(f1))) || feof(f2)) {
            return EXIT_FAILURE;
        }

        if (iswildcard) {
            /* f1 has a wildcard; process that */
            iswildcard = currentwildcardfunc(f2);
        }
        else {
            /* f1 does not have a wildcard; process the next char (unless it starts a wildcard) */
            if (lookaheadbufferisempty()) {
                c1 = getnext(f1);
                if (c1 == WILDCARDCHAR_OPEN) {
                    iswildcard = processwildcardchar(f1);
                    continue;
                }
            }
            else {
                c1 = lookaheadbufferread();
            }
            if (c1 != getnext(f2)) {
                return EXIT_FAILURE;
            }
        }
    }
}

int main(int argc, char *argv[])
{
    FILE *f1, *f2;

    if (handleparameter(argc, argv) < 0) {
        return EXIT_FAILURE;
    }

    f1 = fopen(filename_left, "rb");
    if (f1 == NULL) {
        return EXIT_FAILURE;
    }

    if (empty) {
        fseek(f1, 0, SEEK_END);
        if (ftell(f1) != 0) {
            return EXIT_FAILURE;
        }
        else {
            return EXIT_SUCCESS;
        }
    }
    else {
        if (skiplines_left) {
            skiplines(f1, skiplines_left);
        }

        f2 = fopen(filename_right, "rb");
        if (f2 == NULL) {
            return EXIT_FAILURE;
        }
        if (skiplines_right) {
            skiplines(f2, skiplines_right);
        }
        if (use_wildcards) {
            return comparefileswithwildcards(f1, f2);
        }
        else {
            return comparefiles(f1, f2);
        }
    }
}