2 Commits 56fe72b2b9 ... 1e34257333

Author SHA1 Message Date
  Pedro Gimeno 1e34257333 Remove stray line, minor changes 1 year ago
  Pedro Gimeno 5835d71d54 Clarify the uint regex and comments 1 year ago
1 changed files with 10 additions and 13 deletions
  1. 10 13
      asc2cld.py

+ 10 - 13
asc2cld.py

@@ -240,7 +240,7 @@ def tokenize(src, use_cr=False, type_mode=False, remove_spaces=False):
   # Escape for RE
   L = [re.escape(v) for v in L]
 
-  # Tokenizer for decimal numbers.
+  # Tokenizer for decimal numbers (floats).
   decimal = (
       br'|(?P<dec>(?:'
         # Cautiously avoid matching 1 space alone or 1 trailing space.
@@ -250,18 +250,18 @@ def tokenize(src, use_cr=False, type_mode=False, remove_spaces=False):
       br'(?:'
         br'\ *[%!\#]'                              # suffix
         br'|\ *E\ *[-+]?(?:[0-9\ ]*[0-9])?'        # or exponent, but not both
-      br')?)'
+      br')?)'                                      # optional
   )
   # Tokenizer for uint. Up to 6552, read up to 5 digits; from 6553 on, up to 4.
   # Used for line numbers, either leading or after GOTO/GOSUB/etc.
   uint = (br'|(?P<dec>'
     br'(?:0[0\ ]*)?'                             # leading zeros prefix
     br'(?:0'                                     # zero
-    br'|[1-5](?:\ *[0-9]){,4}'                   # prefix 1..5999, 5 digits
-    br'|6\ *[0-4](?:\ *[0-9]){,3}'               # prefix 6000..6499, 5 digits
-    br'|6\ *5\ *[0-4](?:\ *[0-9]){,2}'           # prefix 6500..6549, 5 digits
+    br'|[1-5](?:\ *[0-9]){4}'                    # prefix 1..5, 5 digits
+    br'|6\ *[0-4](?:\ *[0-9]){3}'                # prefix 60..64, 5 digits
+    br'|6\ *5\ *[0-4](?:\ *[0-9]){2}'            # prefix 650..654, 5 digits
     br'|6\ *5\ *5\ *[0-2](?:\ *[0-9])?'          # prefix 6550..6552, 5 digits
-    br'|[6-9](?:\ *[0-9]){,3}'                   # rest, 4 digits
+    br'|[1-9](?:\ *[0-9]){,3}'                   # rest, 1 to 4 digits
     br'))'
   )
 
@@ -276,7 +276,7 @@ def tokenize(src, use_cr=False, type_mode=False, remove_spaces=False):
     #br'|&B(?P<bin>[01]+)'  # binary numbers don't have tokens
     br'%s'                                    # decimal number
     br'|(?P<str>"(?:[^"]*)(?:"|$))'           # string literal
-     b'|(?P<del>[\x80-\xFF])'                 # remove these
+     b'|(?P<del>[\x80-\xFF])'                 # remove those
     br'|.'
     br')'
     )
@@ -306,9 +306,6 @@ def tokenize(src, use_cr=False, type_mode=False, remove_spaces=False):
   trunc_at_null = re.compile(br'\x00.*', re.S)
   call_strip = re.compile(br'[^0-\x7F\ (]*')
 
-  # Compile the BASIC to a buffer
-  buf = io.BytesIO()
-
   # Truncate source at \x1A (^Z)
   src = re.sub(b'\x1A.*', b'', src, flags=re.S)
   if use_cr:
@@ -320,8 +317,6 @@ def tokenize(src, use_cr=False, type_mode=False, remove_spaces=False):
     src = src.split(b'\n')
     ignore = b'\r'
 
-  filestart = buf.tell()
-
   # First pass: Read the lines and tokenize them into a dict with the line
   # number as the key. Handle line deletion, overwriting, etc.
   PRGLines = {}
@@ -357,7 +352,7 @@ def tokenize(src, use_cr=False, type_mode=False, remove_spaces=False):
       del PRGLines[linenum]
       continue
 
-    lbuf = io.BytesIO()
+    lbuf = io.BytesIO()  # Tokenized line buffer
     tok_mode = ModeFloat
 
     while True:
@@ -464,6 +459,8 @@ def tokenize(src, use_cr=False, type_mode=False, remove_spaces=False):
 
   # Second pass - Write remaining lines in order
   addr = 0x8001
+  buf = io.BytesIO()
+
   for linenum in sorted(PRGLines.keys()):
     line = PRGLines[linenum]
     addr += len(line) + 4