123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133 |
- # vim: set fileencoding=utf8
- # tokenizer.py
- #
- #
- # Created by Antonio on 2/10/08.
- # Trinary Research Project: Digital logic simulator
- # Update (02.17.2008) : Tokenizer will now identify integers.
- #
- import sys
- from Keyword import *
- from Identifier import *
- from Token import *
- from Trits import *
- from Literal import *
- # tokenizer
- def removeWhiteSpace(infile):
- '''removeWhiteSpace: remove preceding white space in the buffer
- infile: file containing the chars to read
- return: False if no more
- valid chars are in the buffer or True if there are still valid
- chars in the buffer
- '''
- value = infile.read(1)
- while value and value.isspace():
- value = infile.read(1)
- if not value:
- return value
- else:
- return value
-
- def isKeyword(infile, value):
- '''isKeyword: identifies token as keyword or symbol
- infile: object file
- value: string to identify
- return: keyword or identifier
- '''
- infile.seek(infile.tell() - 1)
- if value in keywords: #string is a keyword
- return Keyword(value)
- else: #string is an identifier
- return Identifier(value)
- def tokenizeVector(infile, value):
- '''tokenizeVector: find the next trit vector in the file
- infile: object file
- value: current value of trit vector
- return: Trit object containing the vector
- '''
- next = infile.read(1)
- if not next:
- raise "EOF file before end of vector."
- if next in trit_char:
- value = value + next
- return tokenizeVector(infile, value)
- elif next == "\"":
- return Trits(value)
- else:
- raise "Invalid symbol detected: |%s|" % (next, )
- def tokenizeTrit(infile):
- '''tokenizeTrit: find the next trit or trit vector in the file
- infile: object file
- return: Trit object containing the trit
- '''
- next = infile.read(1)
- assert next in trit_char
- trit = Trits(next)
- next = infile.read(1)
- assert next == "'"
- return trit
- def tokenizeString(infile, value):
- '''tokenizeString: find the next keyword or identifier in the file
- infile: object file
- value: current value of the keyword/identifier
- return: string containing the keyword/identifier
- '''
- next = infile.read(1)
- if next.isalnum():
- value = value + next
- return tokenizeString(infile, value)
- else:
- return isKeyword(infile, value)
-
- def tokenizeNumber(infile, value):
- '''tokenizeNumber: identify the next integer in the file
- '''
- next = infile.read(1)
- if next.isdigit():
- value = value + next
- return tokenizeNumber(infile, value)
- else:
- infile.seek(infile.tell() - 1)
- return Literal(str(value))
- def nextToken(infile):
- '''nextToken: read the next token from the given file
- infile: reference to file
- return: next token in the file: False if no more tokens, else True.
- '''
- value = removeWhiteSpace(infile)
-
- if value is None or len(value) == 0: # None if no more tokens
- return None
- elif value == "'":
- return tokenizeTrit(infile) # returns a Trit
- elif value == "\"":
- return tokenizeVector(infile, "") # returns a Trit vector
- elif value.isalpha():
- return tokenizeString(infile, value) # returns an Identifier
- elif value.isdigit():
- return tokenizeNumber(infile, value) # returns a Literal
- elif value in symbols:
- return Token(value)
- else: #invalid symbol detected
- raise "Invalid symbol detected: |%s|" % (value, )
-
- if __name__ == "__main__":
- f = file("testParser", "r")#sys.stdin
- while True:
- token = nextToken(f)
- print token
- if token is None:
- break
|