parse-bytecode.scm 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. ;; Generic byte code parser
  2. (define-record-type attribution :attribution
  3. (make-attribution init-template template-literal
  4. opcode-table make-label at-label)
  5. attribution?
  6. (init-template attribution-init-template)
  7. (template-literal attribution-template-literal)
  8. (opcode-table attribution-opcode-table)
  9. (make-label attribution-make-label)
  10. (at-label attribution-at-label))
  11. (define (opcode-table-ref table i)
  12. (vector-ref table i))
  13. (define (opcode-table-set! table i new)
  14. (vector-set! table i new))
  15. (define (make-opcode-table default)
  16. (make-vector op-count default))
  17. ;; Example attribution
  18. (define (disass)
  19. (define (disass-init-template state template p-args push-template? push-env? push-closure?)
  20. (cons (list 0 'protocol p-args push-template? push-env? push-closure?)
  21. state))
  22. (define instruction-set-table
  23. (make-opcode-table
  24. (lambda (opcode template state pc len . args)
  25. (cons `(,pc ,(enumerand->name opcode op) ,@(map cdr args)) state))))
  26. (define (attribute-literal literal i state)
  27. state)
  28. (define (make-label target-pc)
  29. target-pc)
  30. (define (at-label label state)
  31. (cons `(,label :) state))
  32. (make-attribution disass-init-template attribute-literal
  33. instruction-set-table make-label at-label))
  34. (define (parse-template x state attribution)
  35. (let* ((tem (coerce-to-template x))
  36. (template-len (template-length tem)))
  37. (let lp ((i 1) (state state))
  38. (if (= i template-len)
  39. (parse-template-code tem (template-code tem) state attribution)
  40. (let ((literal (template-ref tem i)))
  41. (if (template? literal)
  42. (lp (+ i 1) (parse-template literal state attribution))
  43. (lp (+ i 1) ((attribution-template-literal attribution) literal i state))))))))
  44. (define (byte-code? x)
  45. (let ((code (template-code (coerce-to-template x))))
  46. (define (byte-code-protocol? protocol)
  47. (or (<= protocol maximum-stack-args)
  48. (= protocol two-byte-nargs-protocol)
  49. (= protocol two-byte-nargs+list-protocol)
  50. (= protocol ignore-values-protocol)
  51. (= protocol call-with-values-protocol)
  52. (= protocol args+nargs-protocol)
  53. (= protocol nary-dispatch-protocol)
  54. (and (= protocol big-stack-protocol)
  55. (byte-code-protocol?
  56. (code-vector-ref code (- (code-vector-length code) 3))))))
  57. (byte-code-protocol? (code-vector-ref code 1))))
  58. (define (parse-template-code tem code state attribution)
  59. (with-template
  60. tem code state attribution
  61. (lambda (pc length state)
  62. (let loop ((pc pc)
  63. (state state))
  64. (if (< pc length)
  65. (receive (size state)
  66. (parse-instruction tem code pc state attribution)
  67. (loop (+ pc size) state))
  68. state)))))
  69. (define (with-template tem code state attribution fun)
  70. (let ((length (template-code-length code)))
  71. (let-fluid
  72. *bc-make-labels* '()
  73. (lambda ()
  74. (for-each
  75. (lambda (pc) (pc->label pc attribution))
  76. (debug-data-jump-back-dests (template-debug-data tem)))
  77. (receive (size protocol-arguments)
  78. (parse-protocol code 1 attribution)
  79. (receive (push-template? push-env? push-closure?)
  80. (case (code-vector-ref code (+ size 1))
  81. ((#b000) (values #f #f #f))
  82. ((#b001) (values #t #f #f))
  83. ((#b010) (values #f #t #f))
  84. ((#b011) (values #t #t #f))
  85. ((#b100) (values #f #f #t))
  86. ((#b110) (values #f #t #t))
  87. ((#b101) (values #t #f #t))
  88. ((#b111) (values #t #t #t))
  89. (else (assertion-violation 'with-template "invalid init-template spec"
  90. (code-vector-ref code (+ size 1)))))
  91. (fun (+ size 2)
  92. length
  93. ((attribution-init-template attribution)
  94. state tem protocol-arguments push-template? push-env? push-closure?))))))))
  95. (define (parse-instruction template code pc state attribution)
  96. (let* ((opcode (code-vector-ref code pc))
  97. (len.rev-args (cond ((= opcode (enum op computed-goto)) ; unused?
  98. (assertion-violation 'parse-instruction
  99. "computed-goto in parse-bytecode"))
  100. (else
  101. (parse-opcode-args opcode
  102. pc
  103. code
  104. template
  105. attribution))))
  106. (total-len (+ 1 (car len.rev-args)))) ; 1 for the opcode
  107. (values total-len
  108. (really-parse-instruction pc total-len opcode template state
  109. (reverse (cdr len.rev-args)) attribution))))
  110. (define (really-parse-instruction pc len opcode template state args attribution)
  111. (let ((new-state (if (label-at-pc? pc)
  112. ((attribution-at-label attribution)
  113. (pc->label pc attribution)
  114. state)
  115. state)))
  116. (let ((opcode-attribution
  117. (opcode-table-ref (attribution-opcode-table attribution) opcode)))
  118. (if opcode-attribution
  119. (apply opcode-attribution opcode template new-state pc len args)
  120. (assertion-violation 'parse-instruction "cannot attribute "
  121. (enumerand->name opcode op) args)))))
  122. ;;--------------------
  123. ;; labels
  124. (define *bc-make-labels* (make-fluid '()))
  125. (define (add-pc! pc attribution)
  126. (set-fluid! *bc-make-labels*
  127. (cons (cons pc ((attribution-make-label attribution) pc))
  128. (fluid *bc-make-labels*))))
  129. (define (pc->label pc attribution)
  130. (let ((maybe-pc.label (assq pc (fluid *bc-make-labels*))))
  131. (if maybe-pc.label
  132. (cdr maybe-pc.label)
  133. (begin
  134. (add-pc! pc attribution)
  135. (pc->label pc attribution)))))
  136. (define (label-at-pc? pc)
  137. (if (assq pc (fluid *bc-make-labels*)) #t #f))
  138. ; (enum op make-[big-]flat-env)
  139. ; number of vars
  140. ; number of closures
  141. ; [offset of template in frame
  142. ; offsets of templates in template]
  143. ; number of variables in frame (size)
  144. ; offsets of vars in frame
  145. ; [offset of env in frame
  146. ; number of vars in env
  147. ; offsets of vars in level]*
  148. (define-record-type env-data :env-data
  149. (make-env-data total-count frame-offsets maybe-template-index closure-offsets
  150. env-offsets)
  151. env-data?
  152. (total-count env-data-total-count)
  153. (frame-offsets env-data-frame-offsets)
  154. (maybe-template-index env-data-maybe-template-index)
  155. (closure-offsets env-data-closure-offsets)
  156. (env-offsets env-data-env-offsets))
  157. (define (parse-flat-env-args pc code size fetch)
  158. (let ((start-pc pc)
  159. (total-count (fetch code pc))
  160. (closure-count (fetch code (+ pc size))))
  161. (receive (template-index closure-offsets)
  162. (if (< 0 closure-count)
  163. (values (fetch code (+ pc size size))
  164. (get-offsets code (+ pc size size size)
  165. size fetch closure-count))
  166. (values #f '()))
  167. (let* ((pc (if (< 0 closure-count)
  168. (+ pc
  169. (* 2 size) ; counts
  170. size ; template offset
  171. (* closure-count size)) ; subtemplates
  172. (+ pc (* 2 size)))) ; counts
  173. (frame-count (fetch code pc))
  174. (pc (+ pc size)))
  175. (let ((frame-offsets (get-offsets code pc size fetch frame-count)))
  176. (let ((pc (+ pc (* frame-count size)))
  177. (count (+ closure-count frame-count)))
  178. (let loop ((pc pc) (count count) (rev-env-offsets '()))
  179. (if (= count total-count)
  180. (values (- pc start-pc)
  181. (make-env-data total-count frame-offsets
  182. template-index closure-offsets
  183. (reverse rev-env-offsets)))
  184. (let* ((env (fetch code pc))
  185. (count-here (fetch code (+ pc size)))
  186. (indexes (get-offsets code
  187. (+ pc size size)
  188. size
  189. fetch
  190. count-here)))
  191. (loop (+ pc (* (+ 2 count-here) size))
  192. (+ count count-here)
  193. (cons (cons env indexes) rev-env-offsets)))))))))))
  194. (define (get-offsets code pc size fetch count)
  195. (do ((pc pc (+ pc size))
  196. (i 0 (+ i 1))
  197. (r '() (cons (fetch code pc) r)))
  198. ((= i count)
  199. (reverse r))))
  200. ; Parse a protocol, returning the number of bytes of instruction stream that
  201. ; were consumed. PC has to point behind the PRTOCOL opcode
  202. (define (parse-protocol code pc attribution)
  203. (let ((protocol (code-vector-ref code pc)))
  204. (really-parse-protocol protocol code pc attribution)))
  205. (define (really-parse-protocol protocol code pc attribution)
  206. (cond ((<= protocol maximum-stack-args)
  207. (values 1 (list protocol)))
  208. ((= protocol two-byte-nargs-protocol)
  209. (values 3 (list protocol (get-offset code (+ pc 1)))))
  210. ((= protocol two-byte-nargs+list-protocol)
  211. (values 3 (list protocol (get-offset code (+ pc 1)))))
  212. ((= protocol ignore-values-protocol)
  213. (values 1 (list protocol)))
  214. ((= protocol call-with-values-protocol)
  215. (let ((offset (get-offset code (+ pc 1))))
  216. (values 3 (list protocol
  217. (pc->label (- (+ offset pc) 1)
  218. attribution)
  219. (zero? offset)))))
  220. ((= protocol args+nargs-protocol)
  221. (values 2 (list protocol (code-vector-ref code (+ pc 1)))))
  222. ((= protocol nary-dispatch-protocol)
  223. (values 5 (cons protocol (parse-dispatch code pc attribution))))
  224. ((= protocol big-stack-protocol)
  225. (let ((real-protocol (code-vector-ref code
  226. (- (code-vector-length code) 3)))
  227. (stack-size (get-offset code (- (code-vector-length code) 2))))
  228. (receive (size real-attribution)
  229. (really-parse-protocol real-protocol code pc attribution)
  230. (values size
  231. (list protocol real-attribution stack-size)))))
  232. (else
  233. (assertion-violation 'parse-protocol "unknown protocol" protocol pc))))
  234. (define (parse-dispatch code pc attribution)
  235. (define (maybe-parse-one-dispatch index)
  236. (let ((offset (code-vector-ref code (+ pc index))))
  237. (if (= offset 0)
  238. #f
  239. (pc->label (+ offset pc) attribution))))
  240. (map maybe-parse-one-dispatch (list 3 4 5 2)))
  241. (define (protocol-protocol p-args)
  242. (car p-args))
  243. (define (n-ary-protocol? p-args)
  244. (let ((protocol (car p-args)))
  245. (if (or (= protocol two-byte-nargs+list-protocol)
  246. (= protocol call-with-values-protocol)
  247. (= protocol ignore-values-protocol))
  248. #t
  249. (if (or (<= protocol maximum-stack-args)
  250. (= protocol two-byte-nargs-protocol))
  251. #f
  252. (if (= protocol big-stack-protocol)
  253. (n-ary-protocol? (cadr p-args))
  254. (assertion-violation 'n-ary-protocol?
  255. "unknown protocol" p-args))))))
  256. (define (protocol-nargs p-args)
  257. (let ((protocol (car p-args)))
  258. (cond ((<= protocol maximum-stack-args)
  259. protocol)
  260. ((= protocol two-byte-nargs-protocol)
  261. (cadr p-args))
  262. ((= protocol two-byte-nargs+list-protocol)
  263. (cadr p-args))
  264. ((= protocol args+nargs-protocol)
  265. (cadr p-args))
  266. ((= protocol big-stack-protocol)
  267. (protocol-nargs (cadr p-args)))
  268. ((= protocol ignore-values-protocol)
  269. 0)
  270. ((= protocol call-with-values-protocol)
  271. (assertion-violation 'protocol-nargs
  272. "call-with-values-protocol in protocol-nargs"))
  273. (else
  274. (assertion-violation 'protocol-nargs
  275. "unknown protocol" p-args)))))
  276. (define (protocol-cwv-tailcall? p-args)
  277. (let ((protocol (protocol-protocol p-args)))
  278. (if (not (= protocol call-with-values-protocol))
  279. (assertion-violation 'protocol-cwv-tailcall?
  280. "invalid protocol" protocol))
  281. (caddr p-args)))
  282. (define (call-with-values-protocol-target p-args)
  283. (let ((protocol (protocol-protocol p-args)))
  284. (if (not (= protocol call-with-values-protocol))
  285. (assertion-violation 'call-with-values-protocol-target
  286. "invalid protocol" protocol))
  287. (cadr p-args)))
  288. ; Generic opcode argument parser
  289. (define (parse-opcode-args op start-pc code template attribution)
  290. (let ((specs (vector-ref opcode-arg-specs op)))
  291. (let loop ((specs specs) (pc (+ start-pc 1)) (len 0) (args '()))
  292. (if (null? specs)
  293. (cons len args)
  294. (let ((spec (car specs)))
  295. (cond
  296. ((eq? spec 'protocol)
  297. (receive (size p-args)
  298. (parse-protocol code pc attribution)
  299. (loop (cdr specs)
  300. (+ pc size)
  301. (+ len size)
  302. (cons (cons 'protocol p-args) args))))
  303. ((or (eq? spec 'env-data)
  304. (eq? spec 'big-env-data))
  305. (receive (size env-data)
  306. (receive (slot-size fetch)
  307. (if (eq? spec 'env-data)
  308. (values 1 code-vector-ref)
  309. (values 2 get-offset))
  310. (parse-flat-env-args pc code slot-size fetch))
  311. (loop (cdr specs)
  312. (+ pc size)
  313. (+ len size)
  314. (cons (cons 'env-data env-data) args))))
  315. ((eq? spec 'instr)
  316. (let ((opcode (code-vector-ref code pc)))
  317. (let ((len.revargs (parse-opcode-args opcode
  318. pc
  319. code
  320. template
  321. attribution)))
  322. (loop (cdr specs)
  323. (+ pc 1 (car len.revargs))
  324. (+ len 1 (car len.revargs))
  325. (cons
  326. (cons 'instr
  327. (cons opcode (reverse (cdr len.revargs))))
  328. args)))))
  329. ((= 0 (arg-spec-size spec pc code))
  330. (cons len args))
  331. (else
  332. (let ((arg (parse-opcode-arg specs
  333. pc
  334. start-pc
  335. code
  336. template
  337. attribution)))
  338. (loop (cdr specs)
  339. (+ pc (arg-spec-size spec pc code))
  340. (+ len (arg-spec-size spec pc code))
  341. (cons arg args))))))))))
  342. ; The number of bytes required by an argument.
  343. (define (arg-spec-size spec pc code)
  344. (case spec
  345. ((byte nargs stack-index index literal stob) 1)
  346. ((two-bytes two-byte-nargs two-byte-stack-index two-byte-index offset offset-) 2)
  347. ((env-data) (assertion-violation 'arg-spec-size "env-data in arg-spec-size"))
  348. ((protocol) (assertion-violation 'arg-spec-size "protocol in arg-spec-size"))
  349. ((moves-data)
  350. (let ((n-moves (code-vector-ref code pc)))
  351. (+ 1 (* 2 n-moves))))
  352. ((big-moves-data)
  353. (let ((n-moves (code-vector-ref code pc)))
  354. (+ 2 (* 4 n-moves))))
  355. ((cont-data)
  356. (- (get-offset code pc) 1)) ; size includes opcode
  357. (else 0)))
  358. ; Parse the particular type of argument.
  359. (define (parse-opcode-arg specs pc start-pc code template attribution)
  360. (cons
  361. (car specs)
  362. (case (car specs)
  363. ((byte nargs stack-index index)
  364. (code-vector-ref code pc))
  365. ((two-bytes two-byte-nargs two-byte-stack-index two-byte-index)
  366. (get-offset code pc))
  367. ((literal)
  368. (- (code-vector-ref code pc) 128))
  369. ((offset)
  370. (let ((offset (get-offset code pc)))
  371. (if (zero? offset)
  372. #f
  373. (pc->label (+ start-pc offset) attribution))))
  374. ((offset-)
  375. (pc->label (- start-pc (get-offset code pc)) attribution))
  376. ((stob)
  377. (code-vector-ref code pc))
  378. ((cont-data)
  379. (parse-cont-data-args pc code template attribution))
  380. ((moves-data)
  381. (let ((n-moves (code-vector-ref code pc)))
  382. (let loop ((offset (+ pc 1))
  383. (n n-moves))
  384. (if (zero? n)
  385. '()
  386. (cons (cons (code-vector-ref code offset)
  387. (code-vector-ref code (+ offset 1)))
  388. (loop (+ offset 2) (- n 1)))))))
  389. ((big-moves-data)
  390. (let ((n-moves (get-offset code pc)))
  391. (let loop ((offset (+ pc 2))
  392. (n n-moves))
  393. (if (zero? n)
  394. '()
  395. (cons (cons (get-offset code offset)
  396. (get-offset code (+ offset 2)))
  397. (loop (+ offset 4) (- n 1)))))))
  398. (else (assertion-violation 'parse-opcode-arg
  399. "unknown arg spec: " (car specs))))))
  400. (define-record-type cont-data :cont-data
  401. (make-cont-data length mask-bytes live-offsets template pc gc-mask-size depth)
  402. cont-data?
  403. (length cont-data-length)
  404. (mask-bytes cont-data-mask-bytes)
  405. ;; #f if all are live
  406. (live-offsets cont-data-live-offsets)
  407. (template cont-data-template)
  408. (pc cont-data-pc)
  409. (gc-mask-size cont-data-gc-mask-size)
  410. (depth cont-data-depth))
  411. (define (parse-cont-data-args pc code template attribution)
  412. (let* ((len (get-offset code pc))
  413. (end-pc (- (+ pc len) 1)) ; len includes opcode
  414. (gc-mask-size (code-vector-ref code (- end-pc 3)))
  415. (depth (get-offset code (- end-pc 2)))
  416. (offset (get-offset code (- end-pc 5)))
  417. (template (get-offset code (- end-pc 7)))
  418. (mask-bytes
  419. (let lp ((the-pc (+ pc 2)) (mask-bytes '()))
  420. (if (>= the-pc (+ pc 2 gc-mask-size))
  421. mask-bytes
  422. (lp (+ the-pc 1)
  423. (cons (code-vector-ref code the-pc) mask-bytes)))))
  424. (live-offsets
  425. (and (not (zero? gc-mask-size))
  426. (gc-mask-live-offsets (bytes->bits mask-bytes)))))
  427. (make-cont-data len
  428. mask-bytes
  429. live-offsets
  430. template
  431. (pc->label offset attribution)
  432. gc-mask-size
  433. depth)))
  434. (define (bytes->bits l)
  435. (let loop ((n 0) (l l))
  436. (if (null? l)
  437. n
  438. (loop (+ (arithmetic-shift n 8) (car l))
  439. (cdr l)))))
  440. (define (gc-mask-live-offsets mask)
  441. (let loop ((mask mask) (i 0) (l '()))
  442. (if (zero? mask)
  443. (reverse l)
  444. (loop (arithmetic-shift mask -1) (+ 1 i)
  445. (if (odd? mask)
  446. (cons i l)
  447. l)))))
  448. ;----------------
  449. ; Utilities.
  450. ; TODO: Put the template-related stuff into a separate module?
  451. ; Turn OBJ into a template, if possible.
  452. (define (coerce-to-template obj)
  453. (cond ((template? obj) obj)
  454. ((closure? obj) (closure-template obj))
  455. ((continuation? obj) (continuation-template obj))
  456. (else (assertion-violation 'coerce-to-template
  457. "expected a procedure or continuation" obj))))
  458. (define (template-code-length code)
  459. (if (and (= (enum op protocol)
  460. (code-vector-ref code 0))
  461. (= big-stack-protocol
  462. (code-vector-ref code 1)))
  463. (- (code-vector-length code) 3)
  464. (code-vector-length code)))
  465. ; Fetch the two-byte value at PC in CODE.
  466. (define (get-offset code pc)
  467. (+ (* (code-vector-ref code pc)
  468. byte-limit)
  469. (code-vector-ref code (+ pc 1))))