import java.util.Stack;
import java.io.Reader;
import java.io.EOFException;
import java.io.IOException;

/**
 * This is the simplest possible table-driven lexer.  It recognizes two token
 * types -- unsigned integers and character-based words.  All other
 * characters are discarded.  This assumes that all input is 7-bit ASCII; any
 * other characters are errors and cause an exception.
 *
 * @author Terran Lane
 * @version 1.0
 */
public class MiniLexer implements Lexer {

  /**
   * Initializes this Lexer to take characters from the designated Reader
   * object.  Note that this Lexer does <em>not</em> close the Reader when
   * lexing is complete (i.e., when EOF is encountered) -- that job is left
   * up to the caller.  This lexer assumes that when EOF is encountered, the
   * token stream is done (i.e., the stream won't reset and no more tokens
   * will become available).
   *
   * @param r The Reader from which to take character data.
   */
  public MiniLexer(Reader r) {
    if (r==null) {
      throw new NullPointerException("Reader arg to MiniLexer must be " +
				     "non-null");
    }
    _in=r;
    _initTab();
    _pbBuf=new Stack();
    _curStr=new StringBuffer();
    _curState=ST_INIT;
    _tokAvail=true;
  }

  public boolean hasNext() {
    return _tokAvail;
  }

  public Token next() throws IOException {
    if (!_pbBuf.empty()) {
      return (Token)_pbBuf.pop();
    }
    if (!_tokAvail) {
      throw new EOFException("No more tokens available");
    }
    Token t;
    while (true) {
      int c=_in.read();
      if (c<0) {
	// EOF -- return any currently accumulating token and return
	_tokAvail=false;
	t=new BaseToken(_curStr.toString(),
			_tab[_curState][C_EOF].getTokType());
	_curState=-1;
	_curStr=null;
	return t;
      }
      switch (_tab[_curState][c].getAct()) {
	case A_RET:
	  // return the currently accumulated token, and shift current
	  // character onto beginning of next token
	  t=new BaseToken(_curStr.toString(),
			  _tab[_curState][c].getTokType());
	  _curStr.delete(0,_curStr.length());
	  _curStr.append((char)c);
	  return t;
	case A_DROP:
	  // simply drop the current character and continue (don't return the
	  // currently accumulating token
	  _curState=_tab[_curState][c].getNextState();
	  break;
	case A_SHIFT:
	  // shift the current character onto the buffer and continue
	  _curStr.append((char)c);
	  _curState=_tab[_curState][c].getNextState();
	  break;
	case A_DRET:
	  // drop the current character, but treat it as a token terminator;
	  // return the currently accumulating token and continue.
	  t=new BaseToken(_curStr.toString(),
			  _tab[_curState][c].getTokType());
	  _curStr.delete(0,_curStr.length());
	  return t;
	default:
	  assert true : "action=" + _tab[_curState][c].getAct() +
	    "_curState=" + _curState + ", c=" + c + "(" + (char)c + ")" +
	    ", _curStr=" + _curStr;
      }
    }
  }

  public int pushBack(Token t) {
    _pbBuf.push(t);
    return _pbBuf.size();
  }

  /* ******************** end of public interface ******************** */

  /**
   * Inner class representing a lexical action, including next state and
   * action to take (e.g., RETURN, SHIFT, or DROP).
   */
  protected static class _LexAct {
    public _LexAct(int nState, int act, int ttype) {
      if (nState<0 || act<0) {
	throw new IllegalArgumentException("State/Action/TokType tuple for " +
					   "_LexAct object must both be " +
					   ">=0, not nState=" + nState +
					   ",act=" + act +
					   ",ttype=" + ttype);
      }
      _ns=nState;
      _a=act;
      _t=ttype;
    }
    public int getNextState() { return _ns; }
    public int getAct() { return _a; }
    public int getTokType() { return _t; }
    private final int _ns;
    private final int _a;
    private final int _t;
  }

  private void _initTab() {
    _tab=new _LexAct[ST_NUM_STATES][129];
    for (int i=0;i<128;++i) {
      if (Character.isDigit((char)i)) {
	_tab[ST_INIT][i]=new _LexAct(ST_ININT,A_SHIFT,BaseToken.T_UNKN);
	_tab[ST_ININT][i]=new _LexAct(ST_ININT,A_SHIFT,BaseToken.T_INT);
	_tab[ST_INWORD][i]=new _LexAct(ST_ININT,A_RET,BaseToken.T_WORD);
      }
      else if (Character.isLetter((char)i)) {
	_tab[ST_INIT][i]=new _LexAct(ST_INWORD,A_SHIFT,BaseToken.T_UNKN);
	_tab[ST_ININT][i]=new _LexAct(ST_INWORD,A_RET,BaseToken.T_INT);
	_tab[ST_INWORD][i]=new _LexAct(ST_INWORD,A_SHIFT,BaseToken.T_WORD);
      }
      else {
	_tab[ST_INIT][i]=new _LexAct(ST_INIT,A_DROP,BaseToken.T_UNKN);
	_tab[ST_ININT][i]=new _LexAct(ST_INIT,A_DRET,BaseToken.T_INT);
	_tab[ST_INWORD][i]=new _LexAct(ST_INIT,A_DRET,BaseToken.T_WORD);
      }
    }
    _tab[ST_INIT][C_EOF]=new _LexAct(ST_INIT,A_DROP,BaseToken.T_UNKN);
    _tab[ST_ININT][C_EOF]=new _LexAct(ST_INIT,A_DRET,BaseToken.T_INT);
    _tab[ST_INWORD][C_EOF]=new _LexAct(ST_INIT,A_DRET,BaseToken.T_WORD);
  }

  protected static final int A_RET=1;
  protected static final int A_DROP=2;
  protected static final int A_SHIFT=3;
  protected static final int A_DRET=4;

  protected static final int ST_INIT=0;		// initial state
  protected static final int ST_ININT=1;	// parsing an int
  protected static final int ST_INWORD=2;	// parsing a word
  protected static final int ST_NUM_STATES=3;

  private static final int C_EOF=128;		// special char code for EOF

  private final Stack _pbBuf;
  private StringBuffer _curStr;
  private int _curState;
  private final Reader _in;
  private _LexAct[][] _tab;
  private boolean _tokAvail;
}
