/* Generally required for stdin/stdout, printf, sprintf... */
#include <stdio.h>
#include <string.h>

int main(int argc, char ** argv) {
  /* 
   * Though which comment is part of this enum, it is only ever meant to have 
   * the values COMMENT_C, or COMMENT_CPP
   */
  /* This Enum is used to indicate which comment type is selected. */
  enum commentLangs {
    COMMENT_C, COMMENT_SCRIPT, COMMENT_ASM, COMMENT_CPP
  } whichComment, comments = COMMENT_SCRIPT;
  /* These Enums are entirely for the switch statements. */
  enum blockTypes {
    BLOCK_COMMENT, BLOCK_CODE
  } in_comment = BLOCK_CODE;

  /* 
   * A general usage string to be printed whenever the USAGE of the program 
   * is requested.   This is designed to be placed directly in a *printf as
   * the format string.
   */
  const char * USAGE = "%s [ 'asm' | 'C' | 'C++' | 'script' ]\n";

  /* General variables. */
  int cha;
  char ch;
  
  /* 
   * Determine arguments to the program, specifically what type of comments
   * are expected. 
   */
  if (argc > 1) {
    if (!strcmp(argv[1], "C")) {
      comments = COMMENT_C;
    } else if (!strcmp(argv[1], "C++")) {
      comments = COMMENT_CPP;
    } else if (!strcmp(argv[1], "script")) {
      comments = COMMENT_SCRIPT;
    } else if (!strcmp(argv[1], "asm")) {
      comments = COMMENT_ASM;
    } else {
      /* Unknown option */
      printf(USAGE, argv[0]);
      exit(1);
    }
  }
    
  /* 
   * Process the file.  As with most text parsers, this program is setup as a 
   * state machine.  This is done one character at a time.
   */
  while ((cha = getchar()) != EOF) {
    ch = (char)cha;
    switch(comments) {
      /* 
       * C and C++ comments can be interchangable.  In obsolete compilers, 
       * C comments are the / * * / type comments, where the C++ comments are 
       * the / / type comments.  
       */
      case COMMENT_C:
      case COMMENT_CPP:
        if (in_comment == BLOCK_CODE) {
          /* Check for a comment begin. */
          if (ch == '/') {
            /* 
             * Now, we have a '/', so we have to check for one of two
             * characters, either a '*', or another '/'.
             */
            if ((cha = getchar()) != EOF) {
              ch = (char)cha;
              if (ch == '*') {
                in_comment = BLOCK_COMMENT;
                whichComment = COMMENT_C;
                break;
              } else if (ch == '/') {
                in_comment = BLOCK_COMMENT;
                whichComment = COMMENT_CPP;
                break;
              } else {
                /* 
                 * If this was not a comment block, then putback the character
                 * that we just read.
                 */
                ungetc(cha, stdin);
              }
            }
          } 
          /* If we are not processing a comment out, just output the char. */
          putchar(cha);
        } else if (in_comment == BLOCK_COMMENT) {
          /* Check for a comment end. */
          if (whichComment == COMMENT_CPP && ch == '\n') {
            /* C++ comments of the type / / are only until the end of line. */
            in_comment = BLOCK_CODE;
            break;
          } else if (whichComment == COMMENT_C && ch == '*') {
            /* 
             * C comments of the type / * * / are running comments, meaning 
             * that they expect an ending comment character. 
             */
            /* check for the ending '/' */
            if ((cha = getchar()) != EOF) {
              ch = (char)cha;
              if (ch == '/') {
                in_comment = BLOCK_CODE;
                break;
              } else {
                /* If this is not the ending character, put the char back.*/
                ungetc(cha, stdin);
              }
            }
          }
        }
        break;
      case COMMENT_ASM:
        /* 
         * assembly comments are actually a much easier case, they are just 
         * like the C++ comments, except they are started by the ; character 
         * only, so we only have to check one character at a time.
         */
        if (in_comment == BLOCK_COMMENT && ch == '\n') 
          in_comment = BLOCK_CODE;
        else if (in_comment == BLOCK_CODE && ch == ';')
          in_comment = BLOCK_COMMENT;
        else if (in_comment == BLOCK_CODE)
          /* If we are not in a comment, then output the char. */
          putchar(cha);
        break;
      case COMMENT_SCRIPT:
        /* 
         * Similar to assembly, comments in scripts are until the end of line, 
         * and started by the '#' character. 
         */
        if (in_comment == BLOCK_COMMENT && ch == '\n')
          in_comment = BLOCK_CODE;
        else if (in_comment == BLOCK_CODE && ch == '#')
          in_comment = BLOCK_COMMENT;
        else if (in_comment == BLOCK_CODE)
          /* If we are not in a comment, then output the char. */
          putchar(ch);
        break;
      default:
        /* 
         * If we get here, then one of our internal variables has been
         * corrupted, i.e. placed in a bad state, and while this is not 
         * likely, there is a small change that single event upsets could 
         * occur (1:5,234,422,224, as you can see, really, really small) though
         * still, just to make sure, we are going to try and catch it.
         */
        printf("Internal Error!!!!   Restart!\n");
        exit(1);
    } 
  }

  return 0;
}
