tokenizer.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. # vim: set fileencoding=utf8
  2. # tokenizer.py
  3. #
  4. #
  5. # Created by Antonio on 2/10/08.
  6. # Trinary Research Project: Digital logic simulator
  7. # Update (02.17.2008) : Tokenizer will now identify integers.
  8. #
  9. import sys
  10. from Keyword import *
  11. from Identifier import *
  12. from Token import *
  13. from Trits import *
  14. from Literal import *
  15. # tokenizer
  16. def removeWhiteSpace(infile):
  17. '''removeWhiteSpace: remove preceding white space in the buffer
  18. infile: file containing the chars to read
  19. return: False if no more
  20. valid chars are in the buffer or True if there are still valid
  21. chars in the buffer
  22. '''
  23. value = infile.read(1)
  24. while value and value.isspace():
  25. value = infile.read(1)
  26. if not value:
  27. return value
  28. else:
  29. return value
  30. def isKeyword(infile, value):
  31. '''isKeyword: identifies token as keyword or symbol
  32. infile: object file
  33. value: string to identify
  34. return: keyword or identifier
  35. '''
  36. infile.seek(infile.tell() - 1)
  37. if value in keywords: #string is a keyword
  38. return Keyword(value)
  39. else: #string is an identifier
  40. return Identifier(value)
  41. def tokenizeVector(infile, value):
  42. '''tokenizeVector: find the next trit vector in the file
  43. infile: object file
  44. value: current value of trit vector
  45. return: Trit object containing the vector
  46. '''
  47. next = infile.read(1)
  48. if not next:
  49. raise "EOF file before end of vector."
  50. if next in trit_char:
  51. value = value + next
  52. return tokenizeVector(infile, value)
  53. elif next == "\"":
  54. return Trits(value)
  55. else:
  56. raise "Invalid symbol detected: |%s|" % (next, )
  57. def tokenizeTrit(infile):
  58. '''tokenizeTrit: find the next trit or trit vector in the file
  59. infile: object file
  60. return: Trit object containing the trit
  61. '''
  62. next = infile.read(1)
  63. assert next in trit_char
  64. trit = Trits(next)
  65. next = infile.read(1)
  66. assert next == "'"
  67. return trit
  68. def tokenizeString(infile, value):
  69. '''tokenizeString: find the next keyword or identifier in the file
  70. infile: object file
  71. value: current value of the keyword/identifier
  72. return: string containing the keyword/identifier
  73. '''
  74. next = infile.read(1)
  75. if next.isalnum():
  76. value = value + next
  77. return tokenizeString(infile, value)
  78. else:
  79. return isKeyword(infile, value)
  80. def tokenizeNumber(infile, value):
  81. '''tokenizeNumber: identify the next integer in the file
  82. '''
  83. next = infile.read(1)
  84. if next.isdigit():
  85. value = value + next
  86. return tokenizeNumber(infile, value)
  87. else:
  88. infile.seek(infile.tell() - 1)
  89. return Literal(str(value))
  90. def nextToken(infile):
  91. '''nextToken: read the next token from the given file
  92. infile: reference to file
  93. return: next token in the file: False if no more tokens, else True.
  94. '''
  95. value = removeWhiteSpace(infile)
  96. if value is None or len(value) == 0: # None if no more tokens
  97. return None
  98. elif value == "'":
  99. return tokenizeTrit(infile) # returns a Trit
  100. elif value == "\"":
  101. return tokenizeVector(infile, "") # returns a Trit vector
  102. elif value.isalpha():
  103. return tokenizeString(infile, value) # returns an Identifier
  104. elif value.isdigit():
  105. return tokenizeNumber(infile, value) # returns a Literal
  106. elif value in symbols:
  107. return Token(value)
  108. else: #invalid symbol detected
  109. raise "Invalid symbol detected: |%s|" % (value, )
  110. if __name__ == "__main__":
  111. f = file("testParser", "r")#sys.stdin
  112. while True:
  113. token = nextToken(f)
  114. print token
  115. if token is None:
  116. break