# simple_lexer.py

# --------------------------------------------------------------------------

class Lexer (object) :
   eos = 0
   identifier = 1
   number = 2
   real = 3
   character = 4
   string = 5
   separator = 6

   def __init__ (self) :
       self.source = ""
       self.reset ()

   def reset (self) :
                        # next character
       self.byteOfs = 0 # byte offset (from 0)
       self.fileInx = 0 # file index
       self.lineNum = 1 # line number (from 1)
       self.colNum  = 1 # column number (from 1)

       self.charByteOfs = 0 # character (in self.ch)
       self.charFileInx = 0
       self.charLineNum = 1
       self.charColNum = 1

       self.tokenByteOfs = 0 # token (in self.token)
       self.tokenFileInx = 0
       self.tokenLineNum = 1
       self.tokenColNum = 1

       self.ch = 0
       self.token = self.eos
       self.tokenText = ""

       self.sourceLen = len (self.source)
       self.nextChar ()
       self.nextToken ()

   # -----------------------------------------------------------------------

   def openFile (self, fileName) :
       f = open (fileName, "r")
       self.source = f.read ()
       self.reset ()

   def openString (self, sourceText) :
       self.source = sourceText
       self.reset ()

   # -----------------------------------------------------------------------

   def error (self, text) :
       raise Exception (text + ", line " + str (self.lineNum) + ", column " + str (self.colNum))

   # -----------------------------------------------------------------------

   def nextChar (self) :
       self.charByteOfs = self.byteOfs
       self.charFileInx = self.fileInx
       self.charLineNum = self.lineNum
       self.charColNum  = self.colNum

       if self.byteOfs < self.sourceLen :
          self.ch = self.source [self.byteOfs]
          self.byteOfs = self.byteOfs + 1
          if self.ch == '\n' :
             self.lineNum = self.lineNum + 1
          elif self.ch != '\r' :
             self.colNum = self.colNum + 1
       else :
          self.ch = 0

   def mark (self) :
       return (self.tokenByteOfs, self.tokenFileInx, self.tokenLineNum, self.tokenColNum)

   def rewind (self, params) :
       self.byteOfs = params [0]
       self.fileInx = params [1]
       self.lineNum = params [2]
       self.colNum = params [3]
       self.nextChar ()
       self.nextToken ()

   # -----------------------------------------------------------------------

   def isLetter (self, c) :
       return c >= 'A' and c <= 'Z' or c >= 'a' and c <= 'z' or c == '_';

   def isDigit (self, c) :
       return c >= '0' and c <= '9';

   def isHexDigit (self, c) :
       return c >= 'A' and c <= 'F' or c >= 'a' and c <= 'f' or c >= '0' and c <= '9';

   # -----------------------------------------------------------------------

   def comment (self, mark1) :
       while self.ch != 0 and self.ch != mark1 :
          self.nextChar ()
       if self.ch == mark1 :
          self.nextChar () # skip mark1
       else :
          self.error ("Unterminated comment")

   def comment2 (self, mark1, mark2) :
       prev = ' '
       while self.ch != 0 and not (prev == mark1 and self.ch == mark2) :
          prev = self.ch
          self.nextChar ()
       if self.ch == mark2 :
          self.nextChar () # skip mark2
       else :
          self.error ("Unterminated comment")

   def checkDigit (self) :
       if not self.isDigit (self.ch) :
          self.Error ("Digit expected")

   def digits (self) :
       while self.isDigit (self.ch) :
          self.tokenText = self.tokenText + self.ch
          self.nextChar ()

   def stringChar (self) :
       if self.ch != '\\' :
          c = self.ch
          self.nextChar ()
          return c
       else :
          self.nextChar () # skip backslash

          if self.ch >= '0' and self.ch <= '7' :
             cnt = 1
             c = 0
             while self.ch >= '0' and self.ch <= '7' and cnt <= 3 :
                c = 8 * c  + ord (self.ch) - ord ('0')
                cnt = cnt + 1
                self.nextChar ()
             return chr (c)

          elif self.ch == 'x' or self.ch <= 'X' :
             self.nextChar ()
             c = 0
             while self.ch >= '0' and self.ch <= '9' or self.ch >= 'A' and self.ch <= 'Z' or self.ch >= 'a' and self.ch <= 'z':
                if self.ch >= '0' and self.ch <= '9' :
                   n = ord (self.ch) - ord ('0')
                elif self.ch >= 'A' and self.ch <= 'Z' :
                   n = ord (self.ch) - ord ('A') + 10
                else :
                   n = ord (self.ch) - ord ('a') + 10
                c = 16 * c  + n
                self.nextChar ()
             return chr (c % 255)

          elif self.ch == 'a' :
             c = '\a'
          elif self.ch == 'b' :
             c =  '\b'
          elif self.ch == 'f' :
             c =  '\f'
          elif self.ch == 'n' :
             c =  '\n'
          elif self.ch == 'r' :
             c =  '\r'
          elif self.ch == 't' :
             c =  '\t'
          elif self.ch == 'v' :
             c =  '\v'
          elif self.ch == '\'' or self.ch == '\"'  or ch == '?' :
             c = self.ch
          else :
             c = self.ch # same

          self.nextChar ()
          return c

   def backStep (self) :
       self.tokenByteOfs = self.tokenByteOfs - 1
       self.tokenColNum = self.tokenColNum - 1

   # -----------------------------------------------------------------------

   def nextToken (self) :
       self.token = self.eos
       self.tokenText = ""

       slash = False
       whiteSpace = True

       while whiteSpace and self.ch != 0:
          while self.ch != 0 and self.ch <= ' ' :
             self.nextChar ();
          whiteSpace = False
          if self.ch == '/' :
             self.nextChar () # skip '/'
             if self.ch == '/' :
                self.nextChar () # skip '/'
                while self.ch != 0 and self.ch != '\n' and self.ch != '\r' :
                   self.nextChar ()
                whiteSpace = True # check again for white space
             elif self.ch == '*' :
                self.nextChar () # skip '*'
                self.comment2 ('*', '/')
                whiteSpace = True # check again for white space
             else:
                slash = True # produce '/' token

       self.tokenByteOfs = self.charByteOfs
       self.tokenFileInx = self.charFileInx
       self.tokenLineNum = self.charLineNum
       self.tokenColNum = self.charColNum

       if slash :
          self.token = self.separator
          self.tokenText = '/'
          self.backStep ()

       elif self.ch == 0 :
          self.token = self.eos

       elif self.isLetter (self.ch) :
          self.token = self.identifier
          while self.isLetter (self.ch) :
             self.tokenText = self.tokenText + self.ch
             self.nextChar ()

       elif self.isDigit (self.ch) :
          self.token = self.number
          self.digits ()

       elif self.ch == '\'' :
          self.token = self.character
          self.nextChar ()
          while self.ch != 0 and self.ch != '\n' and self.ch != '\r' and self.ch != '\'' :
             self.tokenText = self.tokenText + self.stringChar ()
          if self.ch != '\'' :
             self.error ("Unterminated string")
          self.nextChar ()

       elif self.ch == '\"' :
          self.token = self.string
          self.nextChar ()
          while self.ch != 0 and self.ch != '\n' and self.ch != '\r' and self.ch != '\"' :
             self.tokenText = self.tokenText + self.stringChar ()
          if self.ch != '\"' :
             self.error ("Unterminated string")
          self.nextChar ()

       else :
          self.token = self.separator
          self.tokenText = self.ch
          self.nextChar ()

   # -----------------------------------------------------------------------

   def isEndOfSource (self) :
       return self.token == self.eos

   def isIdentifier (self) :
       return self.token == self.identifier

   def isNumber (self) :
       return self.token == self.number

   def isSeparator (self, value) :
       return self.token == self.separator and self.tokenText == value

   def checkSeparator (self, value) :
       if not self.isSeparator (value) :
          self.error (value + " expected")
       self.nextToken ()

   def isKeyword (self, value) :
       return self.token == self.identifier and self.tokenText == value

   def checkKeyword (self, value) :
       if not self.isKeyword (value) :
          self.error (value + " expected")
       self.nextToken ()

# --------------------------------------------------------------------------

if __name__ == "__main__" :
    lexer = Lexer ()
    lexer.openFile ("input.txt")

    while not lexer.isEndOfSource () :
       print (lexer.tokenText)
       lexer.nextToken ()

# kate: indent-width 1; show-tabs true; replace-tabs true; remove-trailing-spaces all