/******************************************************************************
 * Author:      Shawn Stoffer
 *
 * Description: If a line extends more than the specified number of  
 *              characters then make it into multiple lines.
 *
 * Algorithm:   Get each character from stdin.  Count how many 
 *              characters there are, if there are more than the specified
 *              number of characters counted before a newline is reached then 
 *              insert a newline and reset the count.  Every time a
 *              newline is encountered, whether it be inserted by this
 *              program, or in the original input reset the count. 
 *              Indicate that a line continuation occured by showing
 *              three stars on the continued line.
 *
 * Usage:       breakline [without] [-stw] [-help] [-c number]
 *
 * Arguments:     The arguments to this program are the 'without' option, the 
 *              '-s', '-w', 't' or '-c' options.  The without option is the old 
 *              style argument and is present only to maintain previous 
 *              functionality, to be backward compatible, it performs the same
 *              function as the -s option.
 *                The -s option will tell breakline NOT to prepend three star
 *              characters('*') in front of the continued line.
 *                The -w option will tell breakline to not break words across 
 *              lines, a word is defined here to be a string of characters
 *              from one whitespace character to another whitespace character.
 *              Note:  The -s and -w options can be combined on the command
 *               line into -sw.  
 *                The -c option is used to specify the number of characters to 
 *              put on a line.  This is defaulted to be 77, three characters
 *              before the default of 80 characters in a normal terminal.
 *                The -t option is used to specifiy that a tab should preceed
 *              a line completion.
 *             
 *              All input is accepted only from stdin, and all
 *              all output is put on stdout.
 *****************************************************************************/
/* This is necessary for printf, putc, getc, stdin, stdout, and feof */
#include <stdio.h>
/* General C include file...  atoi is contained here */
#include <stdlib.h>
/* This is necessary for strchr and strncmp */
#include <string.h>
/* This is necessary for the isspace macro */
#include <ctype.h>
/* This program uses the garray data structure, written and maintained by 
 * Shawn Stoffer.  This data structure is a growable char array, and while it 
 * can be accessed using the subscript ([]) operator, it does not update the
 * internal data structure size if that operator is used, so it cannot add
 * characters beyond the current maximum allowed characters in the array..
 * a small note is that this is only really important if you want the ability
 * to make sure words are kept together.  What constitutes a word here is 
 * a whitespace delimited set of characters.
 */
#include "garray.h"

int main(int argc, char ** argv) {

  /* General purpose vars. */
  char ch;
  char *tmp;
  int i = 0, j, validOption = 1, artificialNewline = 0;
  

  /* Program Options */
  int breakwords = 1, STARS = 1, noTabs = 1, help = 0;
  char chars = 77;

  /* Needed for !breakwords. */
  garray cur_word;



/* Check for optional arguments. */
  if (argc > 1) {

   /* Maintain ancient functionality... */
    if (!strncmp(argv[1], "without", 7)) {
      STARS = 0;
    } else  /* the New arguments. */

      /* run through all arguments to the program. */
      for (i = 1; i < argc; i++) {
        /*
         * Check if we have a new option here...(New options 
         * begin with '-' character.
         */
        if (*(argv[i]) == '-') {  

          /* 
           * So what are we doing here?  Basically what is happening 
           * is that we are checking first for a completely invaliid
           * option, just '-' without anything else...  Then it begins
           * getting a little more complicated.  With strchr we are 
           * checking for each of the character options in the option
           * but the key is it checks the entire option for each of 
           * the option chars, so that -swtc performs no differently
           * than -tcsw.  The key here is searching the entire option
           * for each char option with strchr.
           */ 

          /* Check for just a '-' as an option... */
          if ( !(tmp = argv[i]+1) ) 
            validOption = 0;

          /* "-w" == !breakwords */
          if ( strchr(tmp, 'w') ) 
            breakwords = 0;

          /* "-s" == !STARS */
          if ( strchr(tmp, 's') )
            STARS = 0;

          if ( strchr(tmp, 't') ) 
            noTabs = 0;
         
          if ( strchr(tmp, 'h') )
            help = 1;

          /* 
           * This must be last, because it takes an argument and 
           * blows away tmp.
           */
          if ( strchr(tmp, 'c') )
            if (i+1 < argc) {
              tmp = argv[++i];
              if ((chars = atoi(tmp)) < 10)  {
                /* This is considered a catastrophic error condition. */
                printf("Argument supplied to '-c' option is not a valid number.\n");
                exit(1);
              } 
            }
        } else { 
          help = 1; 
        }
        if (help) {
          printf(
           "Usage: breakline [without] [-help] [-stw] [-c num]\n"
           "\twithout   : old style argument, does the same as -s.\n\n"
           "\t-s        : continue each broken line without three stars ('*')\n"
           "\t            prepended to each broken line\n\n"
           "\t-w        : break each line at either the specified amount of \n"
           "\t            chars, or the beginning of the word on that line \n"
           "\t            if it extends beyond the specified number of chars.\n"
           "\t-t        : continue each broken line with a tab prepended\n\n"
           "\n\t-c number : continue each line after number of characters on \n"
           "\t          : that line has been reached.\n\n"
           "\t-help     : this screen.\n\n"
          );
          exit(0);
        }
      }
  }

  /* 
   * Basically here we are simply processing the entire file, one character
   * at a time.
   */
  ch = getc(stdin);
  for (i=1;!feof(stdin); i++) {
   /* 
    * If we want to break words apart ( the default behavior ) then we only
    * need to look at each character, otherwise, we have to look at each word.
    */
   if (breakwords)
    putc(ch, stdout);
   else {
    if (isspace(ch)) {
      for (j = 0; cur_word[j];j++)
        putc(cur_word[j], stdout);
    /* This is an interesting check...  If we have inserted a newline we 
     * want to make sure that the next newline in the line is NOT printed
     * if we broke the line right before the \n.  This is actually a bug fix.
     * If breakline, the original version were run multiple times on the same 
     * file it would insert a newline exactly on the end of the line at the
     * character limit, and therefore the next running would insert another
     * newline right before that one.  This bug is fixed by making sure that
     * the next char is not a \n...but unfortunately there is no good way to 
     * check that.
     */
      if (!artificialNewline || ch != '\n' || i > 2) putc(ch, stdout);
      else if (ch == '\n') artificialNewline = 0;
      cur_word.clear();
    } else
      cur_word.Put(ch);
   }
   /* If we hit either a newline, or we reach the end of the line, then output 
    * newline, and check if a word needs to be broken.  If we are breaking
    * words, then the cur_word.size() will be 0 and so will still be safe.
    * If we break the line without encountering a newline, then we want the 
    * three stars and the space to be output to show that we broke the line, 
    * if that option is enabled.  Alternately, we can use tabs instead of 
    * stars, though this is another option to the program.  Lastly, the 
    * character count is updated to ensure that we break the line correctly.
    */
   if (i > chars || ch == '\n') {
     i = cur_word.size();
     if (ch != '\n') {
       putc('\n', stdout);
       if (STARS) {
         putc('*', stdout);
         putc('*', stdout);
         putc('*', stdout);
         putc(' ', stdout);
         i += 4;
       }
       artificialNewline = 1;
       if (!noTabs) {
         putc('\t', stdout);
         i += 8;
       }
     }
   }
   /* get the next character from the file. */
   ch = getc(stdin);
  }

  /* If the file did not end before a whitespace character, then output the
   * last word...  If the breakwords option was not turned off then cur_words
   * will be empty, as it will also be if a whitespace character was
   * encountered, so this will not produce undesirable results... 
   */
  for (j = 0; cur_word[j];j++)
    putc(cur_word[j], stdout);

  return 0;
}
