assembler.scm 93 KB


  1. ;;; Guile bytecode assembler
  2. ;;; Copyright (C) 2001, 2009, 2010, 2012, 2013, 2014, 2015 Free Software Foundation, Inc.
  3. ;;;
  4. ;;; This library is free software; you can redistribute it and/or
  5. ;;; modify it under the terms of the GNU Lesser General Public
  6. ;;; License as published by the Free Software Foundation; either
  7. ;;; version 3 of the License, or (at your option) any later version.
  8. ;;;
  9. ;;; This library is distributed in the hope that it will be useful,
  10. ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. ;;; Lesser General Public License for more details.
  13. ;;;
  14. ;;; You should have received a copy of the GNU Lesser General Public
  15. ;;; License along with this library; if not, write to the Free Software
  16. ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. ;;; Commentary:
  18. ;;;
  19. ;;; This module implements an assembler that creates an ELF image from
  20. ;;; bytecode assembly and macro-assembly. The input can be given in
  21. ;;; s-expression form, like ((OP ARG ...) ...). Internally there is a
  22. ;;; procedural interface, the emit-OP procedures, but that is not
  23. ;;; currently exported.
  24. ;;;
  25. ;;; "Primitive instructions" correspond to VM operations. Assemblers
  26. ;;; for primitive instructions are generated programmatically from
  27. ;;; (instruction-list), which itself is derived from the VM sources.
  28. ;;; There are also "macro-instructions" like "label" or "load-constant"
  29. ;;; that expand to 0 or more primitive instructions.
  30. ;;;
  31. ;;; The assembler also handles some higher-level tasks, like creating
  32. ;;; the symbol table, other metadata sections, creating a constant table
  33. ;;; for the whole compilation unit, and writing the dynamic section of
  34. ;;; the ELF file along with the appropriate initialization routines.
  35. ;;;
  36. ;;; Most compilers will want to use the trio of make-assembler,
  37. ;;; emit-text, and link-assembly. That will result in the creation of
  38. ;;; an ELF image as a bytevector, which can then be loaded using
  39. ;;; load-thunk-from-memory, or written to disk as a .go file.
  40. ;;;
  41. ;;; Code:
  42. (define-module (system vm assembler)
  43. #:use-module (system base target)
  44. #:use-module (system vm dwarf)
  45. #:use-module (system vm elf)
  46. #:use-module (system vm linker)
  47. #:use-module (language bytecode)
  48. #:use-module (rnrs bytevectors)
  49. #:use-module (ice-9 binary-ports)
  50. #:use-module (ice-9 vlist)
  51. #:use-module (ice-9 match)
  52. #:use-module (srfi srfi-1)
  53. #:use-module (srfi srfi-4)
  54. #:use-module (srfi srfi-9)
  55. #:use-module (srfi srfi-11)
  56. #:export (make-assembler
  57. emit-call
  58. emit-call-label
  59. emit-tail-call
  60. emit-tail-call-label
  61. (emit-receive* . emit-receive)
  62. emit-receive-values
  63. emit-return
  64. emit-return-values
  65. emit-call/cc
  66. emit-abort
  67. (emit-builtin-ref* . emit-builtin-ref)
  68. emit-br-if-nargs-ne
  69. emit-br-if-nargs-lt
  70. emit-br-if-nargs-gt
  71. emit-assert-nargs-ee
  72. emit-assert-nargs-ge
  73. emit-assert-nargs-le
  74. emit-alloc-frame
  75. emit-reset-frame
  76. emit-assert-nargs-ee/locals
  77. emit-br-if-npos-gt
  78. emit-bind-kwargs
  79. emit-bind-rest
  80. emit-br
  81. emit-br-if-true
  82. emit-br-if-null
  83. emit-br-if-nil
  84. emit-br-if-pair
  85. emit-br-if-struct
  86. emit-br-if-char
  87. emit-br-if-tc7
  88. (emit-br-if-eq* . emit-br-if-eq)
  89. (emit-br-if-eqv* . emit-br-if-eqv)
  90. (emit-br-if-equal* . emit-br-if-equal)
  91. (emit-br-if-=* . emit-br-if-=)
  92. (emit-br-if-<* . emit-br-if-<)
  93. (emit-br-if-<=* . emit-br-if-<=)
  94. (emit-br-if-logtest* . emit-br-if-logtest)
  95. (emit-mov* . emit-mov)
  96. (emit-box* . emit-box)
  97. (emit-box-ref* . emit-box-ref)
  98. (emit-box-set!* . emit-box-set!)
  99. emit-make-closure
  100. (emit-free-ref* . emit-free-ref)
  101. (emit-free-set!* . emit-free-set!)
  102. emit-current-module
  103. emit-resolve
  104. (emit-define!* . emit-define!)
  105. emit-toplevel-box
  106. emit-module-box
  107. emit-prompt
  108. (emit-wind* . emit-wind)
  109. emit-unwind
  110. (emit-push-fluid* . emit-push-fluid)
  111. emit-pop-fluid
  112. (emit-fluid-ref* . emit-fluid-ref)
  113. (emit-fluid-set* . emit-fluid-set)
  114. (emit-string-length* . emit-string-length)
  115. (emit-string-ref* . emit-string-ref)
  116. (emit-string->number* . emit-string->number)
  117. (emit-string->symbol* . emit-string->symbol)
  118. (emit-symbol->keyword* . emit-symbol->keyword)
  119. (emit-cons* . emit-cons)
  120. (emit-car* . emit-car)
  121. (emit-cdr* . emit-cdr)
  122. (emit-set-car!* . emit-set-car!)
  123. (emit-set-cdr!* . emit-set-cdr!)
  124. (emit-add* . emit-add)
  125. (emit-add1* . emit-add1)
  126. (emit-sub* . emit-sub)
  127. (emit-sub1* . emit-sub1)
  128. (emit-mul* . emit-mul)
  129. (emit-div* . emit-div)
  130. (emit-quo* . emit-quo)
  131. (emit-rem* . emit-rem)
  132. (emit-mod* . emit-mod)
  133. (emit-ash* . emit-ash)
  134. (emit-logand* . emit-logand)
  135. (emit-logior* . emit-logior)
  136. (emit-logxor* . emit-logxor)
  137. (emit-make-vector* . emit-make-vector)
  138. (emit-make-vector/immediate* . emit-make-vector/immediate)
  139. (emit-vector-length* . emit-vector-length)
  140. (emit-vector-ref* . emit-vector-ref)
  141. (emit-vector-ref/immediate* . emit-vector-ref/immediate)
  142. (emit-vector-set!* . emit-vector-set!)
  143. (emit-vector-set!/immediate* . emit-vector-set!/immediate)
  144. (emit-struct-vtable* . emit-struct-vtable)
  145. (emit-allocate-struct/immediate* . emit-allocate-struct/immediate)
  146. (emit-struct-ref/immediate* . emit-struct-ref/immediate)
  147. (emit-struct-set!/immediate* . emit-struct-set!/immediate)
  148. (emit-allocate-struct* . emit-allocate-struct)
  149. (emit-struct-ref* . emit-struct-ref)
  150. (emit-struct-set!* . emit-struct-set!)
  151. (emit-class-of* . emit-class-of)
  152. (emit-make-array* . emit-make-array)
  153. (emit-bv-u8-ref* . emit-bv-u8-ref)
  154. (emit-bv-s8-ref* . emit-bv-s8-ref)
  155. (emit-bv-u16-ref* . emit-bv-u16-ref)
  156. (emit-bv-s16-ref* . emit-bv-s16-ref)
  157. (emit-bv-u32-ref* . emit-bv-u32-ref)
  158. (emit-bv-s32-ref* . emit-bv-s32-ref)
  159. (emit-bv-u64-ref* . emit-bv-u64-ref)
  160. (emit-bv-s64-ref* . emit-bv-s64-ref)
  161. (emit-bv-f32-ref* . emit-bv-f32-ref)
  162. (emit-bv-f64-ref* . emit-bv-f64-ref)
  163. (emit-bv-u8-set!* . emit-bv-u8-set!)
  164. (emit-bv-s8-set!* . emit-bv-s8-set!)
  165. (emit-bv-u16-set!* . emit-bv-u16-set!)
  166. (emit-bv-s16-set!* . emit-bv-s16-set!)
  167. (emit-bv-u32-set!* . emit-bv-u32-set!)
  168. (emit-bv-s32-set!* . emit-bv-s32-set!)
  169. (emit-bv-u64-set!* . emit-bv-u64-set!)
  170. (emit-bv-s64-set!* . emit-bv-s64-set!)
  171. (emit-bv-f32-set!* . emit-bv-f32-set!)
  172. (emit-bv-f64-set!* . emit-bv-f64-set!)
  173. emit-text
  174. link-assembly))
  175. ;; Like define-inlinable, but only for first-order uses of the defined
  176. ;; routine. Should residualize less code.
  177. (eval-when (expand)
  178. (define-syntax define-inline
  179. (lambda (x)
  180. (syntax-case x ()
  181. ((_ (name arg ...) body ...)
  182. (with-syntax (((temp ...) (generate-temporaries #'(arg ...))))
  183. #`(eval-when (expand)
  184. (define-syntax-rule (name temp ...)
  185. (let ((arg temp) ...)
  186. body ...)))))))))
  187. ;;; Bytecode consists of 32-bit units, often subdivided in some way.
  188. ;;; These helpers create one 32-bit unit from multiple components.
  189. (define-inline (pack-u8-u24 x y)
  190. (unless (<= 0 x 255)
  191. (error "out of range" x))
  192. (logior x (ash y 8)))
  193. (define-inline (pack-u8-s24 x y)
  194. (unless (<= 0 x 255)
  195. (error "out of range" x))
  196. (logior x (ash (cond
  197. ((< 0 (- y) #x800000)
  198. (+ y #x1000000))
  199. ((<= 0 y #xffffff)
  200. y)
  201. (else (error "out of range" y)))
  202. 8)))
  203. (define-inline (pack-u1-u7-u24 x y z)
  204. (unless (<= 0 x 1)
  205. (error "out of range" x))
  206. (unless (<= 0 y 127)
  207. (error "out of range" y))
  208. (logior x (ash y 1) (ash z 8)))
  209. (define-inline (pack-u8-u12-u12 x y z)
  210. (unless (<= 0 x 255)
  211. (error "out of range" x))
  212. (unless (<= 0 y 4095)
  213. (error "out of range" y))
  214. (logior x (ash y 8) (ash z 20)))
  215. (define-inline (pack-u8-u8-u16 x y z)
  216. (unless (<= 0 x 255)
  217. (error "out of range" x))
  218. (unless (<= 0 y 255)
  219. (error "out of range" y))
  220. (logior x (ash y 8) (ash z 16)))
  221. (define-inline (pack-u8-u8-u8-u8 x y z w)
  222. (unless (<= 0 x 255)
  223. (error "out of range" x))
  224. (unless (<= 0 y 255)
  225. (error "out of range" y))
  226. (unless (<= 0 z 255)
  227. (error "out of range" z))
  228. (logior x (ash y 8) (ash z 16) (ash w 24)))
  229. (eval-when (expand)
  230. (define-syntax pack-flags
  231. (syntax-rules ()
  232. ;; Add clauses as needed.
  233. ((pack-flags f1 f2) (logior (if f1 (ash 1 0) 0)
  234. (if f2 (ash 2 0) 0))))))
  235. ;;; Helpers to read and write 32-bit units in a buffer.
  236. (define-inline (u32-ref buf n)
  237. (bytevector-u32-native-ref buf (* n 4)))
  238. (define-inline (u32-set! buf n val)
  239. (bytevector-u32-native-set! buf (* n 4) val))
  240. (define-inline (s32-ref buf n)
  241. (bytevector-s32-native-ref buf (* n 4)))
  242. (define-inline (s32-set! buf n val)
  243. (bytevector-s32-native-set! buf (* n 4) val))
  244. ;;; A <meta> entry collects metadata for one procedure. Procedures are
  245. ;;; written as contiguous ranges of bytecode.
  246. ;;;
  247. (eval-when (expand)
  248. (define-syntax-rule (assert-match arg pattern kind)
  249. (let ((x arg))
  250. (unless (match x (pattern #t) (_ #f))
  251. (error (string-append "expected " kind) x)))))
  252. (define-record-type <meta>
  253. (%make-meta label properties low-pc high-pc arities)
  254. meta?
  255. (label meta-label)
  256. (properties meta-properties set-meta-properties!)
  257. (low-pc meta-low-pc)
  258. (high-pc meta-high-pc set-meta-high-pc!)
  259. (arities meta-arities set-meta-arities!))
  260. (define (make-meta label properties low-pc)
  261. (assert-match label (or (? exact-integer?) (? symbol?)) "symbol")
  262. (assert-match properties (((? symbol?) . _) ...) "alist with symbolic keys")
  263. (%make-meta label properties low-pc #f '()))
  264. (define (meta-name meta)
  265. (assq-ref (meta-properties meta) 'name))
  266. ;; Metadata for one <lambda-case>.
  267. (define-record-type <arity>
  268. (make-arity req opt rest kw-indices allow-other-keys?
  269. low-pc high-pc definitions)
  270. arity?
  271. (req arity-req)
  272. (opt arity-opt)
  273. (rest arity-rest)
  274. (kw-indices arity-kw-indices)
  275. (allow-other-keys? arity-allow-other-keys?)
  276. (low-pc arity-low-pc)
  277. (high-pc arity-high-pc set-arity-high-pc!)
  278. (definitions arity-definitions set-arity-definitions!))
  279. (eval-when (expand)
  280. (define-syntax *block-size* (identifier-syntax 32)))
  281. ;;; An assembler collects all of the words emitted during assembly, and
  282. ;;; also maintains ancillary information such as the constant table, a
  283. ;;; relocation list, and so on.
  284. ;;;
  285. ;;; Bytecode consists of 32-bit units. We emit bytecode using native
  286. ;;; endianness. If we're targeting a foreign endianness, we byte-swap
  287. ;;; the bytevector as a whole instead of conditionalizing each access.
  288. ;;;
  289. (define-record-type <asm>
  290. (make-asm cur idx start prev written
  291. labels relocs
  292. word-size endianness
  293. constants inits
  294. shstrtab next-section-number
  295. meta sources
  296. dead-slot-maps)
  297. asm?
  298. ;; We write bytecode into what is logically a growable vector,
  299. ;; implemented as a list of blocks. asm-cur is the current block, and
  300. ;; asm-idx is the current index into that block, in 32-bit units.
  301. ;;
  302. (cur asm-cur set-asm-cur!)
  303. (idx asm-idx set-asm-idx!)
  304. ;; asm-start is an absolute position, indicating the offset of the
  305. ;; beginning of an instruction (in u32 units). It is updated after
  306. ;; writing all the words for one primitive instruction. It models the
  307. ;; position of the instruction pointer during execution, given that
  308. ;; the VM updates the IP only at the end of executing the instruction,
  309. ;; and is thus useful for computing offsets between two points in a
  310. ;; program.
  311. ;;
  312. (start asm-start set-asm-start!)
  313. ;; The list of previously written blocks.
  314. ;;
  315. (prev asm-prev set-asm-prev!)
  316. ;; The number of u32 words written in asm-prev, which is the same as
  317. ;; the offset of the current block.
  318. ;;
  319. (written asm-written set-asm-written!)
  320. ;; An alist of symbol -> position pairs, indicating the labels defined
  321. ;; in this compilation unit.
  322. ;;
  323. (labels asm-labels set-asm-labels!)
  324. ;; A list of relocations needed by the program text. We use an
  325. ;; internal representation for relocations, and handle textualn
  326. ;; relative relocations in the assembler. Other kinds of relocations
  327. ;; are later reified as linker relocations and resolved by the linker.
  328. ;;
  329. (relocs asm-relocs set-asm-relocs!)
  330. ;; Target information.
  331. ;;
  332. (word-size asm-word-size)
  333. (endianness asm-endianness)
  334. ;; The constant table, as a vhash of object -> label. All constants
  335. ;; get de-duplicated and written into separate sections -- either the
  336. ;; .rodata section, for read-only data, or .data, for constants that
  337. ;; need initialization at load-time (like symbols). Constants can
  338. ;; depend on other constants (e.g. a symbol depending on a stringbuf),
  339. ;; so order in this table is important.
  340. ;;
  341. (constants asm-constants set-asm-constants!)
  342. ;; A list of instructions needed to initialize the constants. Will
  343. ;; run in a thunk with 2 local variables.
  344. ;;
  345. (inits asm-inits set-asm-inits!)
  346. ;; The shstrtab, for section names.
  347. ;;
  348. (shstrtab asm-shstrtab set-asm-shstrtab!)
  349. ;; The section number for the next section to be written.
  350. ;;
  351. (next-section-number asm-next-section-number set-asm-next-section-number!)
  352. ;; A list of <meta>, corresponding to procedure metadata.
  353. ;;
  354. (meta asm-meta set-asm-meta!)
  355. ;; A list of (pos . source) pairs, indicating source information. POS
  356. ;; is relative to the beginning of the text section, and SOURCE is in
  357. ;; the same format that source-properties returns.
  358. ;;
  359. (sources asm-sources set-asm-sources!)
  360. ;; A list of (pos . dead-slot-map) pairs, indicating dead slot maps.
  361. ;; POS is relative to the beginning of the text section.
  362. ;; DEAD-SLOT-MAP is a bitfield of slots that are dead at call sites,
  363. ;; as an integer.
  364. ;;
  365. (dead-slot-maps asm-dead-slot-maps set-asm-dead-slot-maps!))
  366. (define-inline (fresh-block)
  367. (make-u32vector *block-size*))
  368. (define* (make-assembler #:key (word-size (target-word-size))
  369. (endianness (target-endianness)))
  370. "Create an assembler for a given target @var{word-size} and
  371. @var{endianness}, falling back to appropriate values for the configured
  372. target."
  373. (make-asm (fresh-block) 0 0 '() 0
  374. (make-hash-table) '()
  375. word-size endianness
  376. vlist-null '()
  377. (make-string-table) 1
  378. '() '() '()))
  379. (define (intern-section-name! asm string)
  380. "Add a string to the section name table (shstrtab)."
  381. (string-table-intern! (asm-shstrtab asm) string))
  382. (define-inline (asm-pos asm)
  383. "The offset of the next word to be written into the code buffer, in
  384. 32-bit units."
  385. (+ (asm-idx asm) (asm-written asm)))
  386. (define (allocate-new-block asm)
  387. "Close off the current block, and arrange for the next word to be
  388. written to a fresh block."
  389. (let ((new (fresh-block)))
  390. (set-asm-prev! asm (cons (asm-cur asm) (asm-prev asm)))
  391. (set-asm-written! asm (asm-pos asm))
  392. (set-asm-cur! asm new)
  393. (set-asm-idx! asm 0)))
  394. (define-inline (emit asm u32)
  395. "Emit one 32-bit word into the instruction stream. Assumes that there
  396. is space for the word, and ensures that there is space for the next
  397. word."
  398. (u32-set! (asm-cur asm) (asm-idx asm) u32)
  399. (set-asm-idx! asm (1+ (asm-idx asm)))
  400. (if (= (asm-idx asm) *block-size*)
  401. (allocate-new-block asm)))
  402. (define-inline (make-reloc type label base word)
  403. "Make an internal relocation of type @var{type} referencing symbol
  404. @var{label}, @var{word} words after position @var{start}. @var{type}
  405. may be x8-s24, indicating a 24-bit relative label reference that can be
  406. fixed up by the assembler, or s32, indicating a 32-bit relative
  407. reference that needs to be fixed up by the linker."
  408. (list type label base word))
  409. (define-inline (reset-asm-start! asm)
  410. "Reset the asm-start after writing the words for one instruction."
  411. (set-asm-start! asm (asm-pos asm)))
  412. (define (record-label-reference asm label)
  413. "Record an x8-s24 local label reference. This value will get patched
  414. up later by the assembler."
  415. (let* ((start (asm-start asm))
  416. (pos (asm-pos asm))
  417. (reloc (make-reloc 'x8-s24 label start (- pos start))))
  418. (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
  419. (define* (record-far-label-reference asm label #:optional (offset 0))
  420. "Record an s32 far label reference. This value will get patched up
  421. later by the linker."
  422. (let* ((start (- (asm-start asm) offset))
  423. (pos (asm-pos asm))
  424. (reloc (make-reloc 's32 label start (- pos start))))
  425. (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
  426. ;;;
  427. ;;; Primitive assemblers are defined by expanding `assembler' for each
  428. ;;; opcode in `(instruction-list)'.
  429. ;;;
  430. (eval-when (expand)
  431. (define (id-append ctx a b)
  432. (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b))))
  433. (define-syntax assembler
  434. (lambda (x)
  435. (define-syntax op-case
  436. (lambda (x)
  437. (syntax-case x ()
  438. ((_ asm name ((type arg ...) code ...) clause ...)
  439. #`(if (eq? name 'type)
  440. (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
  441. #'((arg ...)
  442. code ...))
  443. (op-case asm name clause ...)))
  444. ((_ asm name)
  445. #'(error "unmatched name" name)))))
  446. (define (pack-first-word asm opcode type)
  447. (with-syntax ((opcode opcode))
  448. (op-case
  449. asm type
  450. ((U8_X24)
  451. (emit asm opcode))
  452. ((U8_U24 arg)
  453. (emit asm (pack-u8-u24 opcode arg)))
  454. ((U8_L24 label)
  455. (record-label-reference asm label)
  456. (emit asm opcode))
  457. ((U8_U8_I16 a imm)
  458. (emit asm (pack-u8-u8-u16 opcode a (object-address imm))))
  459. ((U8_U12_U12 a b)
  460. (emit asm (pack-u8-u12-u12 opcode a b)))
  461. ((U8_U8_U8_U8 a b c)
  462. (emit asm (pack-u8-u8-u8-u8 opcode a b c))))))
  463. (define (pack-tail-word asm type)
  464. (op-case
  465. asm type
  466. ((U8_U24 a b)
  467. (emit asm (pack-u8-u24 a b)))
  468. ((U8_L24 a label)
  469. (record-label-reference asm label)
  470. (emit asm a))
  471. ((U32 a)
  472. (emit asm a))
  473. ((I32 imm)
  474. (let ((val (object-address imm)))
  475. (unless (zero? (ash val -32))
  476. (error "FIXME: enable truncation of negative fixnums when cross-compiling"))
  477. (emit asm val)))
  478. ((A32 imm)
  479. (unless (= (asm-word-size asm) 8)
  480. (error "make-long-immediate unavailable for this target"))
  481. (emit asm (ash (object-address imm) -32))
  482. (emit asm (logand (object-address imm) (1- (ash 1 32)))))
  483. ((B32))
  484. ((N32 label)
  485. (record-far-label-reference asm label)
  486. (emit asm 0))
  487. ((S32 label)
  488. (record-far-label-reference asm label)
  489. (emit asm 0))
  490. ((L32 label)
  491. (record-far-label-reference asm label)
  492. (emit asm 0))
  493. ((LO32 label offset)
  494. (record-far-label-reference asm label
  495. (* offset (/ (asm-word-size asm) 4)))
  496. (emit asm 0))
  497. ((X8_U24 a)
  498. (emit asm (pack-u8-u24 0 a)))
  499. ((X8_L24 label)
  500. (record-label-reference asm label)
  501. (emit asm 0))
  502. ((B1_X7_L24 a label)
  503. (record-label-reference asm label)
  504. (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
  505. ((B1_U7_L24 a b label)
  506. (record-label-reference asm label)
  507. (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))
  508. ((B1_X31 a)
  509. (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
  510. ((B1_X7_U24 a b)
  511. (emit asm (pack-u1-u7-u24 (if a 1 0) 0 b)))))
  512. (syntax-case x ()
  513. ((_ name opcode word0 word* ...)
  514. (with-syntax ((((formal0 ...)
  515. code0 ...)
  516. (pack-first-word #'asm
  517. (syntax->datum #'opcode)
  518. (syntax->datum #'word0)))
  519. ((((formal* ...)
  520. code* ...) ...)
  521. (map (lambda (word) (pack-tail-word #'asm word))
  522. (syntax->datum #'(word* ...)))))
  523. #'(lambda (asm formal0 ... formal* ... ...)
  524. (unless (asm? asm) (error "not an asm"))
  525. code0 ...
  526. code* ... ...
  527. (reset-asm-start! asm))))))))
  528. (define assemblers (make-hash-table))
  529. (eval-when (expand)
  530. (define-syntax define-assembler
  531. (lambda (x)
  532. (syntax-case x ()
  533. ((_ name opcode kind arg ...)
  534. (with-syntax ((emit (id-append #'name #'emit- #'name)))
  535. #'(define emit
  536. (let ((emit (assembler name opcode arg ...)))
  537. (hashq-set! assemblers 'name emit)
  538. emit)))))))
  539. (define-syntax visit-opcodes
  540. (lambda (x)
  541. (syntax-case x ()
  542. ((visit-opcodes macro arg ...)
  543. (with-syntax (((inst ...)
  544. (map (lambda (x) (datum->syntax #'macro x))
  545. (instruction-list))))
  546. #'(begin
  547. (macro arg ... . inst)
  548. ...)))))))
  549. (visit-opcodes define-assembler)
  550. (eval-when (expand)
  551. ;; Some operands are encoded using a restricted subset of the full
  552. ;; 24-bit local address space, in order to make the bytecode more
  553. ;; dense in the usual case that there are few live locals. Here we
  554. ;; define wrapper emitters that shuffle out-of-range operands into and
  555. ;; out of the reserved range of locals [233,255]. This range is
  556. ;; sufficient because these restricted operands are only present in
  557. ;; the first word of an instruction. Since 8 bits is the smallest
  558. ;; slot-addressing operand size, that means we can fit 3 operands in
  559. ;; the 24 bits of payload of the first word (the lower 8 bits being
  560. ;; taken by the opcode).
  561. ;;
  562. ;; The result are wrapper emitters with the same arity,
  563. ;; e.g. emit-cons* that wraps emit-cons. We expose these wrappers as
  564. ;; the public interface for emitting `cons' instructions. That way we
  565. ;; solve the problem fully and in just one place. The only manual
  566. ;; care that need be taken is in the exports list at the top of the
  567. ;; file -- to be sure that we export the wrapper and not the wrapped
  568. ;; emitter.
  569. (define (shuffling-assembler name kind word0 word*)
  570. (define (analyze-first-word)
  571. (define-syntax op-case
  572. (syntax-rules ()
  573. ((_ type ((%type %kind arg ...) values) clause ...)
  574. (if (and (eq? type '%type) (eq? kind '%kind))
  575. (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
  576. #'((arg ...) values))
  577. (op-case type clause ...)))
  578. ((_ type)
  579. #f)))
  580. (op-case
  581. word0
  582. ((U8_U8_I16 ! a imm)
  583. (values (if (< a (ash 1 8)) a (begin (emit-mov* asm 253 a) 253))
  584. imm))
  585. ((U8_U8_I16 <- a imm)
  586. (values (if (< a (ash 1 8)) a 253)
  587. imm))
  588. ((U8_U12_U12 ! a b)
  589. (values (if (< a (ash 1 12)) a (begin (emit-mov* asm 253 a) 253))
  590. (if (< b (ash 1 12)) b (begin (emit-mov* asm 254 b) 254))))
  591. ((U8_U12_U12 <- a b)
  592. (values (if (< a (ash 1 12)) a 253)
  593. (if (< b (ash 1 12)) b (begin (emit-mov* asm 254 b) 254))))
  594. ((U8_U8_U8_U8 ! a b c)
  595. (values (if (< a (ash 1 8)) a (begin (emit-mov* asm 253 a) 253))
  596. (if (< b (ash 1 8)) b (begin (emit-mov* asm 254 b) 254))
  597. (if (< c (ash 1 8)) c (begin (emit-mov* asm 255 c) 255))))
  598. ((U8_U8_U8_U8 <- a b c)
  599. (values (if (< a (ash 1 8)) a 253)
  600. (if (< b (ash 1 8)) b (begin (emit-mov* asm 254 b) 254))
  601. (if (< c (ash 1 8)) c (begin (emit-mov* asm 255 c) 255))))))
  602. (define (tail-formals type)
  603. (define-syntax op-case
  604. (syntax-rules ()
  605. ((op-case type (%type arg ...) clause ...)
  606. (if (eq? type '%type)
  607. (generate-temporaries #'(arg ...))
  608. (op-case type clause ...)))
  609. ((op-case type)
  610. (error "unmatched type" type))))
  611. (op-case type
  612. (U8_U24 a b)
  613. (U8_L24 a label)
  614. (U32 a)
  615. (I32 imm)
  616. (A32 imm)
  617. (B32)
  618. (N32 label)
  619. (S32 label)
  620. (L32 label)
  621. (LO32 label offset)
  622. (X8_U24 a)
  623. (X8_L24 label)
  624. (B1_X7_L24 a label)
  625. (B1_U7_L24 a b label)
  626. (B1_X31 a)
  627. (B1_X7_U24 a b)))
  628. (define (shuffle-up dst)
  629. (define-syntax op-case
  630. (syntax-rules ()
  631. ((_ type ((%type ...) exp) clause ...)
  632. (if (memq type '(%type ...))
  633. #'exp
  634. (op-case type clause ...)))
  635. ((_ type)
  636. (error "unexpected type" type))))
  637. (with-syntax ((dst dst))
  638. (op-case
  639. word0
  640. ((U8_U8_I16 U8_U8_U8_U8)
  641. (unless (< dst (ash 1 8))
  642. (emit-mov* asm dst 253)))
  643. ((U8_U12_U12)
  644. (unless (< dst (ash 1 12))
  645. (emit-mov* asm dst 253))))))
  646. (and=>
  647. (analyze-first-word)
  648. (lambda (formals+shuffle)
  649. (with-syntax ((emit-name (id-append name #'emit- name))
  650. (((formal0 ...) shuffle) formals+shuffle)
  651. (((formal* ...) ...) (map tail-formals word*)))
  652. (with-syntax (((shuffle-up-dst ...)
  653. (if (eq? kind '<-)
  654. (syntax-case #'(formal0 ...) ()
  655. ((dst . _)
  656. (list (shuffle-up #'dst))))
  657. '())))
  658. #'(lambda (asm formal0 ... formal* ... ...)
  659. (call-with-values (lambda () shuffle)
  660. (lambda (formal0 ...)
  661. (emit-name asm formal0 ... formal* ... ...)))
  662. shuffle-up-dst ...))))))
  663. (define-syntax define-shuffling-assembler
  664. (lambda (stx)
  665. (syntax-case stx ()
  666. ((_ #:except (except ...) name opcode kind word0 word* ...)
  667. (cond
  668. ((or-map (lambda (op) (eq? (syntax->datum #'name) op))
  669. (map syntax->datum #'(except ...)))
  670. #'(begin))
  671. ((shuffling-assembler #'name (syntax->datum #'kind)
  672. (syntax->datum #'word0)
  673. (map syntax->datum #'(word* ...)))
  674. => (lambda (proc)
  675. (with-syntax ((emit (id-append #'name
  676. (id-append #'name #'emit- #'name)
  677. #'*))
  678. (proc proc))
  679. #'(define emit
  680. (let ((emit proc))
  681. (hashq-set! assemblers 'name emit)
  682. emit)))))
  683. (else #'(begin))))))))
  684. (visit-opcodes define-shuffling-assembler #:except (receive mov))
  685. ;; Mov and receive are two special cases that can work without wrappers.
  686. ;; Indeed it is important that they do so.
  687. (define (emit-mov* asm dst src)
  688. (if (and (< dst (ash 1 12)) (< src (ash 1 12)))
  689. (emit-mov asm dst src)
  690. (emit-long-mov asm dst src)))
  691. (define (emit-receive* asm dst proc nlocals)
  692. (if (and (< dst (ash 1 12)) (< proc (ash 1 12)))
  693. (emit-receive asm dst proc nlocals)
  694. (begin
  695. (emit-receive-values asm proc #t 1)
  696. (emit-mov* asm dst (1+ proc))
  697. (emit-reset-frame asm nlocals))))
  698. (define (emit-text asm instructions)
  699. "Assemble @var{instructions} using the assembler @var{asm}.
  700. @var{instructions} is a sequence of instructions, expressed as a list of
  701. lists. This procedure can be called many times before calling
  702. @code{link-assembly}."
  703. (for-each (lambda (inst)
  704. (apply (or (hashq-ref assemblers (car inst))
  705. (error 'bad-instruction inst))
  706. asm
  707. (cdr inst)))
  708. instructions))
  709. ;;;
  710. ;;; The constant table records a topologically sorted set of literal
  711. ;;; constants used by a program. For example, a pair uses its car and
  712. ;;; cdr, a string uses its stringbuf, etc.
  713. ;;;
  714. ;;; Some things we want to add to the constant table are not actually
  715. ;;; Scheme objects: for example, stringbufs, cache cells for toplevel
  716. ;;; references, or cache cells for non-closure procedures. For these we
  717. ;;; define special record types and add instances of those record types
  718. ;;; to the table.
  719. ;;;
  720. (define-inline (immediate? x)
  721. "Return @code{#t} if @var{x} is immediate, and @code{#f} otherwise."
  722. (not (zero? (logand (object-address x) 6))))
  723. (define-record-type <stringbuf>
  724. (make-stringbuf string)
  725. stringbuf?
  726. (string stringbuf-string))
  727. (define-record-type <static-procedure>
  728. (make-static-procedure code)
  729. static-procedure?
  730. (code static-procedure-code))
  731. (define-record-type <uniform-vector-backing-store>
  732. (make-uniform-vector-backing-store bytes element-size)
  733. uniform-vector-backing-store?
  734. (bytes uniform-vector-backing-store-bytes)
  735. (element-size uniform-vector-backing-store-element-size))
  736. (define-record-type <cache-cell>
  737. (make-cache-cell scope key)
  738. cache-cell?
  739. (scope cache-cell-scope)
  740. (key cache-cell-key))
  741. (define (simple-vector? obj)
  742. (and (vector? obj)
  743. (equal? (array-shape obj) (list (list 0 (1- (vector-length obj)))))))
  744. (define (simple-uniform-vector? obj)
  745. (and (array? obj)
  746. (symbol? (array-type obj))
  747. (equal? (array-shape obj) (list (list 0 (1- (array-length obj)))))))
  748. (define (statically-allocatable? x)
  749. "Return @code{#t} if a non-immediate constant can be allocated
  750. statically, and @code{#f} if it would need some kind of runtime
  751. allocation."
  752. (or (pair? x) (string? x) (stringbuf? x) (static-procedure? x) (array? x)))
  753. (define (intern-constant asm obj)
  754. "Add an object to the constant table, and return a label that can be
  755. used to reference it. If the object is already present in the constant
  756. table, its existing label is used directly."
  757. (define (recur obj)
  758. (intern-constant asm obj))
  759. (define (field dst n obj)
  760. (let ((src (recur obj)))
  761. (if src
  762. (if (statically-allocatable? obj)
  763. `((static-patch! ,dst ,n ,src))
  764. `((static-ref 1 ,src)
  765. (static-set! 1 ,dst ,n)))
  766. '())))
  767. (define (intern obj label)
  768. (cond
  769. ((pair? obj)
  770. (append (field label 0 (car obj))
  771. (field label 1 (cdr obj))))
  772. ((simple-vector? obj)
  773. (let lp ((i 0) (inits '()))
  774. (if (< i (vector-length obj))
  775. (lp (1+ i)
  776. (append-reverse (field label (1+ i) (vector-ref obj i))
  777. inits))
  778. (reverse inits))))
  779. ((stringbuf? obj) '())
  780. ((static-procedure? obj)
  781. `((static-patch! ,label 1 ,(static-procedure-code obj))))
  782. ((cache-cell? obj) '())
  783. ((symbol? obj)
  784. `((make-non-immediate 1 ,(recur (symbol->string obj)))
  785. (string->symbol 1 1)
  786. (static-set! 1 ,label 0)))
  787. ((string? obj)
  788. `((static-patch! ,label 1 ,(recur (make-stringbuf obj)))))
  789. ((keyword? obj)
  790. `((static-ref 1 ,(recur (keyword->symbol obj)))
  791. (symbol->keyword 1 1)
  792. (static-set! 1 ,label 0)))
  793. ((number? obj)
  794. `((make-non-immediate 1 ,(recur (number->string obj)))
  795. (string->number 1 1)
  796. (static-set! 1 ,label 0)))
  797. ((uniform-vector-backing-store? obj) '())
  798. ((simple-uniform-vector? obj)
  799. (let ((width (case (array-type obj)
  800. ((vu8 u8 s8) 1)
  801. ((u16 s16) 2)
  802. ;; Bitvectors are addressed in 32-bit units.
  803. ;; Although a complex number is 8 or 16 bytes wide,
  804. ;; it should be byteswapped in 4 or 8 byte units.
  805. ((u32 s32 f32 c32 b) 4)
  806. ((u64 s64 f64 c64) 8)
  807. (else
  808. (error "unhandled array type" obj)))))
  809. `((static-patch! ,label 2
  810. ,(recur (make-uniform-vector-backing-store
  811. (uniform-array->bytevector obj)
  812. width))))))
  813. ((array? obj)
  814. `((static-patch! ,label 1 ,(recur (shared-array-root obj)))))
  815. (else
  816. (error "don't know how to intern" obj))))
  817. (cond
  818. ((immediate? obj) #f)
  819. ((vhash-assoc obj (asm-constants asm)) => cdr)
  820. (else
  821. ;; Note that calling intern may mutate asm-constants and asm-inits.
  822. (let* ((label (gensym "constant"))
  823. (inits (intern obj label)))
  824. (set-asm-constants! asm (vhash-cons obj label (asm-constants asm)))
  825. (set-asm-inits! asm (append-reverse inits (asm-inits asm)))
  826. label))))
  827. (define (intern-non-immediate asm obj)
  828. "Intern a non-immediate into the constant table, and return its
  829. label."
  830. (when (immediate? obj)
  831. (error "expected a non-immediate" obj))
  832. (intern-constant asm obj))
  833. (define (intern-cache-cell asm scope key)
  834. "Intern a cache cell into the constant table, and return its label.
  835. If there is already a cache cell with the given scope and key, it is
  836. returned instead."
  837. (intern-constant asm (make-cache-cell scope key)))
  838. ;; Return the label of the cell that holds the module for a scope.
  839. (define (intern-module-cache-cell asm scope)
  840. "Intern a cache cell for a module, and return its label."
  841. (intern-cache-cell asm scope #t))
  842. ;;;
  843. ;;; Macro assemblers bridge the gap between primitive instructions and
  844. ;;; some higher-level operations.
  845. ;;;
  846. (eval-when (expand)
  847. (define-syntax define-macro-assembler
  848. (lambda (x)
  849. (syntax-case x ()
  850. ((_ (name arg ...) body body* ...)
  851. (with-syntax ((emit (id-append #'name #'emit- #'name)))
  852. #'(begin
  853. (define emit
  854. (let ((emit (lambda (arg ...) body body* ...)))
  855. (hashq-set! assemblers 'name emit)
  856. emit))
  857. (export emit))))))))
  858. (define-macro-assembler (load-constant asm dst obj)
  859. (cond
  860. ((immediate? obj)
  861. (let ((bits (object-address obj)))
  862. (cond
  863. ((and (< dst 256) (zero? (ash bits -16)))
  864. (emit-make-short-immediate asm dst obj))
  865. ((zero? (ash bits -32))
  866. (emit-make-long-immediate asm dst obj))
  867. (else
  868. (emit-make-long-long-immediate asm dst obj)))))
  869. ((statically-allocatable? obj)
  870. (emit-make-non-immediate asm dst (intern-non-immediate asm obj)))
  871. (else
  872. (emit-static-ref asm dst (intern-non-immediate asm obj)))))
  873. (define-macro-assembler (load-static-procedure asm dst label)
  874. (let ((loc (intern-constant asm (make-static-procedure label))))
  875. (emit-make-non-immediate asm dst loc)))
  876. (define-syntax-rule (define-tc7-macro-assembler name tc7)
  877. (define-macro-assembler (name asm slot invert? label)
  878. (emit-br-if-tc7 asm slot invert? tc7 label)))
  879. ;; Keep in sync with tags.h. Part of Guile's ABI. Currently unused
  880. ;; macro assemblers are commented out. See also
  881. ;; *branching-primcall-arities* in (language cps primitives), the set of
  882. ;; macro-instructions in assembly.scm, and
  883. ;; disassembler.scm:code-annotation.
  884. ;;
  885. ;; FIXME: Define all tc7 values in Scheme in one place, derived from
  886. ;; tags.h.
  887. (define-tc7-macro-assembler br-if-symbol 5)
  888. (define-tc7-macro-assembler br-if-variable 7)
  889. (define-tc7-macro-assembler br-if-vector 13)
  890. ;(define-tc7-macro-assembler br-if-weak-vector 13)
  891. (define-tc7-macro-assembler br-if-string 21)
  892. ;(define-tc7-macro-assembler br-if-heap-number 23)
  893. ;(define-tc7-macro-assembler br-if-stringbuf 39)
  894. (define-tc7-macro-assembler br-if-bytevector 77)
  895. ;(define-tc7-macro-assembler br-if-pointer 31)
  896. ;(define-tc7-macro-assembler br-if-hashtable 29)
  897. ;(define-tc7-macro-assembler br-if-fluid 37)
  898. ;(define-tc7-macro-assembler br-if-dynamic-state 45)
  899. ;(define-tc7-macro-assembler br-if-frame 47)
  900. (define-tc7-macro-assembler br-if-keyword 53)
  901. ;(define-tc7-macro-assembler br-if-vm 55)
  902. ;(define-tc7-macro-assembler br-if-vm-cont 71)
  903. ;(define-tc7-macro-assembler br-if-rtl-program 69)
  904. ;(define-tc7-macro-assembler br-if-weak-set 85)
  905. ;(define-tc7-macro-assembler br-if-weak-table 87)
  906. ;(define-tc7-macro-assembler br-if-array 93)
  907. (define-tc7-macro-assembler br-if-bitvector 95)
  908. ;(define-tc7-macro-assembler br-if-port 125)
  909. ;(define-tc7-macro-assembler br-if-smob 127)
  910. (define-macro-assembler (begin-program asm label properties)
  911. (emit-label asm label)
  912. (let ((meta (make-meta label properties (asm-start asm))))
  913. (set-asm-meta! asm (cons meta (asm-meta asm)))))
  914. (define-macro-assembler (end-program asm)
  915. (let ((meta (car (asm-meta asm))))
  916. (set-meta-high-pc! meta (asm-start asm))
  917. (set-meta-arities! meta (reverse (meta-arities meta)))))
  918. (define-macro-assembler (begin-standard-arity asm req nlocals alternate)
  919. (emit-begin-opt-arity asm req '() #f nlocals alternate))
  920. (define-macro-assembler (begin-opt-arity asm req opt rest nlocals alternate)
  921. (emit-begin-kw-arity asm req opt rest '() #f nlocals alternate))
  922. (define-macro-assembler (begin-kw-arity asm req opt rest kw-indices
  923. allow-other-keys? nlocals alternate)
  924. (assert-match req ((? symbol?) ...) "list of symbols")
  925. (assert-match opt ((? symbol?) ...) "list of symbols")
  926. (assert-match rest (or #f (? symbol?)) "#f or symbol")
  927. (assert-match kw-indices (((? keyword?) . (? integer?)) ...)
  928. "alist of keyword -> integer")
  929. (assert-match allow-other-keys? (? boolean?) "boolean")
  930. (assert-match nlocals (? integer?) "integer")
  931. (assert-match alternate (or #f (? exact-integer?) (? symbol?)) "#f or symbol")
  932. (let* ((meta (car (asm-meta asm)))
  933. (arity (make-arity req opt rest kw-indices allow-other-keys?
  934. (asm-start asm) #f '()))
  935. ;; The procedure itself is in slot 0, in the standard calling
  936. ;; convention. For procedure prologues, nreq includes the
  937. ;; procedure, so here we add 1.
  938. (nreq (1+ (length req)))
  939. (nopt (length opt))
  940. (rest? (->bool rest)))
  941. (set-meta-arities! meta (cons arity (meta-arities meta)))
  942. (cond
  943. ((or allow-other-keys? (pair? kw-indices))
  944. (emit-kw-prelude asm nreq nopt rest? kw-indices allow-other-keys?
  945. nlocals alternate))
  946. ((or rest? (pair? opt))
  947. (emit-opt-prelude asm nreq nopt rest? nlocals alternate))
  948. (else
  949. (emit-standard-prelude asm nreq nlocals alternate)))))
  950. (define-macro-assembler (end-arity asm)
  951. (let ((arity (car (meta-arities (car (asm-meta asm))))))
  952. (set-arity-definitions! arity (reverse (arity-definitions arity)))
  953. (set-arity-high-pc! arity (asm-start asm))))
  954. ;; As noted above, we reserve locals 253 through 255 for shuffling large
  955. ;; operands. However the calling convention has all arguments passed in
  956. ;; a contiguous block. This helper, called after the clause has been
  957. ;; chosen and the keyword/optional/rest arguments have been processed,
  958. ;; shuffles up arguments from slot 253 and higher into their final
  959. ;; allocations.
  960. ;;
  961. (define (shuffle-up-args asm nargs)
  962. (when (> nargs 253)
  963. (let ((slot (1- nargs)))
  964. (emit-mov asm (+ slot 3) slot)
  965. (shuffle-up-args asm (1- nargs)))))
  966. (define-macro-assembler (standard-prelude asm nreq nlocals alternate)
  967. (cond
  968. (alternate
  969. (emit-br-if-nargs-ne asm nreq alternate)
  970. (emit-alloc-frame asm nlocals))
  971. ((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12)))
  972. (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
  973. (else
  974. (emit-assert-nargs-ee asm nreq)
  975. (emit-alloc-frame asm nlocals)))
  976. (shuffle-up-args asm nreq))
  977. (define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
  978. (if alternate
  979. (emit-br-if-nargs-lt asm nreq alternate)
  980. (emit-assert-nargs-ge asm nreq))
  981. (cond
  982. (rest?
  983. (emit-bind-rest asm (+ nreq nopt)))
  984. (alternate
  985. (emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
  986. (else
  987. (emit-assert-nargs-le asm (+ nreq nopt))))
  988. (emit-alloc-frame asm nlocals)
  989. (shuffle-up-args asm (+ nreq nopt (if rest? 1 0))))
  990. (define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
  991. allow-other-keys? nlocals alternate)
  992. (if alternate
  993. (begin
  994. (emit-br-if-nargs-lt asm nreq alternate)
  995. (unless rest?
  996. (emit-br-if-npos-gt asm nreq (+ nreq nopt) alternate)))
  997. (emit-assert-nargs-ge asm nreq))
  998. (let ((ntotal (fold (lambda (kw ntotal)
  999. (match kw
  1000. (((? keyword?) . idx)
  1001. (max (1+ idx) ntotal))))
  1002. (+ nreq nopt) kw-indices)))
  1003. ;; FIXME: port 581f410f
  1004. (emit-bind-kwargs asm nreq
  1005. (pack-flags allow-other-keys? rest?)
  1006. (+ nreq nopt)
  1007. ntotal
  1008. (intern-constant asm kw-indices))
  1009. (emit-alloc-frame asm nlocals)
  1010. (shuffle-up-args asm ntotal)))
  1011. (define-macro-assembler (label asm sym)
  1012. (hashq-set! (asm-labels asm) sym (asm-start asm)))
  1013. (define-macro-assembler (source asm source)
  1014. (set-asm-sources! asm (acons (asm-start asm) source (asm-sources asm))))
  1015. (define-macro-assembler (definition asm name slot)
  1016. (let* ((arity (car (meta-arities (car (asm-meta asm)))))
  1017. (def (vector name
  1018. slot
  1019. (* (- (asm-start asm) (arity-low-pc arity)) 4))))
  1020. (set-arity-definitions! arity (cons def (arity-definitions arity)))))
  1021. (define-macro-assembler (cache-current-module! asm module scope)
  1022. (let ((mod-label (intern-module-cache-cell asm scope)))
  1023. (emit-static-set! asm module mod-label 0)))
  1024. (define-macro-assembler (cached-toplevel-box asm dst scope sym bound?)
  1025. (let ((sym-label (intern-non-immediate asm sym))
  1026. (mod-label (intern-module-cache-cell asm scope))
  1027. (cell-label (intern-cache-cell asm scope sym)))
  1028. (emit-toplevel-box asm dst cell-label mod-label sym-label bound?)))
  1029. (define-macro-assembler (cached-module-box asm dst module-name sym public? bound?)
  1030. (let* ((sym-label (intern-non-immediate asm sym))
  1031. (key (cons public? module-name))
  1032. (mod-name-label (intern-constant asm key))
  1033. (cell-label (intern-cache-cell asm key sym)))
  1034. (emit-module-box asm dst cell-label mod-name-label sym-label bound?)))
  1035. (define-macro-assembler (dead-slot-map asm proc-slot dead-slot-map)
  1036. (unless (zero? dead-slot-map)
  1037. (set-asm-dead-slot-maps! asm
  1038. (cons
  1039. (cons* (asm-start asm) proc-slot dead-slot-map)
  1040. (asm-dead-slot-maps asm)))))
  1041. ;;;
  1042. ;;; Helper for linking objects.
  1043. ;;;
  1044. (define (make-object asm name bv relocs labels . kwargs)
  1045. "Make a linker object. This helper handles interning the name in the
  1046. shstrtab, assigning the size, allocating a fresh index, and defining a
  1047. corresponding linker symbol for the start of the section."
  1048. (let ((name-idx (intern-section-name! asm (symbol->string name)))
  1049. (index (asm-next-section-number asm)))
  1050. (set-asm-next-section-number! asm (1+ index))
  1051. (make-linker-object (apply make-elf-section
  1052. #:index index
  1053. #:name name-idx
  1054. #:size (bytevector-length bv)
  1055. kwargs)
  1056. bv relocs
  1057. (cons (make-linker-symbol name 0) labels))))
  1058. ;;;
  1059. ;;; Linking the constant table. This code is somewhat intertwingled
  1060. ;;; with the intern-constant code above, as that procedure also
  1061. ;;; residualizes instructions to initialize constants at load time.
  1062. ;;;
  1063. (define (write-immediate asm buf pos x)
  1064. (let ((val (object-address x))
  1065. (endianness (asm-endianness asm)))
  1066. (case (asm-word-size asm)
  1067. ((4) (bytevector-u32-set! buf pos val endianness))
  1068. ((8) (bytevector-u64-set! buf pos val endianness))
  1069. (else (error "bad word size" asm)))))
  1070. (define (emit-init-constants asm)
  1071. "If there is writable data that needs initialization at runtime, emit
  1072. a procedure to do that and return its label. Otherwise return
  1073. @code{#f}."
  1074. (let ((inits (asm-inits asm)))
  1075. (and (not (null? inits))
  1076. (let ((label (gensym "init-constants")))
  1077. (emit-text asm
  1078. `((begin-program ,label ())
  1079. (assert-nargs-ee/locals 1 1)
  1080. ,@(reverse inits)
  1081. (load-constant 1 ,*unspecified*)
  1082. (return 1)
  1083. (end-program)))
  1084. label))))
  1085. (define (link-data asm data name)
  1086. "Link the static data for a program into the @var{name} section (which
  1087. should be .data or .rodata), and return the resulting linker object.
  1088. @var{data} should be a vhash mapping objects to labels."
  1089. (define (align address alignment)
  1090. (+ address
  1091. (modulo (- alignment (modulo address alignment)) alignment)))
  1092. (define tc7-vector 13)
  1093. (define stringbuf-shared-flag #x100)
  1094. (define stringbuf-wide-flag #x400)
  1095. (define tc7-stringbuf 39)
  1096. (define tc7-narrow-stringbuf
  1097. (+ tc7-stringbuf stringbuf-shared-flag))
  1098. (define tc7-wide-stringbuf
  1099. (+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
  1100. (define tc7-ro-string (+ 21 #x200))
  1101. (define tc7-program 69)
  1102. (define tc7-bytevector 77)
  1103. (define tc7-bitvector 95)
  1104. (define tc7-array 93)
  1105. (let ((word-size (asm-word-size asm))
  1106. (endianness (asm-endianness asm)))
  1107. (define (byte-length x)
  1108. (cond
  1109. ((stringbuf? x)
  1110. (let ((x (stringbuf-string x)))
  1111. (+ (* 2 word-size)
  1112. (case (string-bytes-per-char x)
  1113. ((1) (1+ (string-length x)))
  1114. ((4) (* (1+ (string-length x)) 4))
  1115. (else (error "bad string bytes per char" x))))))
  1116. ((static-procedure? x)
  1117. (* 2 word-size))
  1118. ((string? x)
  1119. (* 4 word-size))
  1120. ((pair? x)
  1121. (* 2 word-size))
  1122. ((simple-vector? x)
  1123. (* (1+ (vector-length x)) word-size))
  1124. ((simple-uniform-vector? x)
  1125. (* 4 word-size))
  1126. ((uniform-vector-backing-store? x)
  1127. (bytevector-length (uniform-vector-backing-store-bytes x)))
  1128. ((array? x)
  1129. (* word-size (+ 3 (* 3 (array-rank x)))))
  1130. (else
  1131. word-size)))
  1132. (define (write-constant-reference buf pos x)
  1133. ;; The asm-inits will fix up any reference to a non-immediate.
  1134. (write-immediate asm buf pos (if (immediate? x) x #f)))
  1135. (define (write buf pos obj)
  1136. (cond
  1137. ((stringbuf? obj)
  1138. (let* ((x (stringbuf-string obj))
  1139. (len (string-length x))
  1140. (tag (if (= (string-bytes-per-char x) 1)
  1141. tc7-narrow-stringbuf
  1142. tc7-wide-stringbuf)))
  1143. (case word-size
  1144. ((4)
  1145. (bytevector-u32-set! buf pos tag endianness)
  1146. (bytevector-u32-set! buf (+ pos 4) len endianness))
  1147. ((8)
  1148. (bytevector-u64-set! buf pos tag endianness)
  1149. (bytevector-u64-set! buf (+ pos 8) len endianness))
  1150. (else
  1151. (error "bad word size" asm)))
  1152. (let ((pos (+ pos (* word-size 2))))
  1153. (case (string-bytes-per-char x)
  1154. ((1)
  1155. (let lp ((i 0))
  1156. (if (< i len)
  1157. (let ((u8 (char->integer (string-ref x i))))
  1158. (bytevector-u8-set! buf (+ pos i) u8)
  1159. (lp (1+ i)))
  1160. (bytevector-u8-set! buf (+ pos i) 0))))
  1161. ((4)
  1162. (let lp ((i 0))
  1163. (if (< i len)
  1164. (let ((u32 (char->integer (string-ref x i))))
  1165. (bytevector-u32-set! buf (+ pos (* i 4)) u32 endianness)
  1166. (lp (1+ i)))
  1167. (bytevector-u32-set! buf (+ pos (* i 4)) 0 endianness))))
  1168. (else (error "bad string bytes per char" x))))))
  1169. ((static-procedure? obj)
  1170. (case word-size
  1171. ((4)
  1172. (bytevector-u32-set! buf pos tc7-program endianness)
  1173. (bytevector-u32-set! buf (+ pos 4) 0 endianness))
  1174. ((8)
  1175. (bytevector-u64-set! buf pos tc7-program endianness)
  1176. (bytevector-u64-set! buf (+ pos 8) 0 endianness))
  1177. (else (error "bad word size"))))
  1178. ((cache-cell? obj)
  1179. (write-immediate asm buf pos #f))
  1180. ((string? obj)
  1181. (let ((tag (logior tc7-ro-string (ash (string-length obj) 8)))) ; FIXME: unused?
  1182. (case word-size
  1183. ((4)
  1184. (bytevector-u32-set! buf pos tc7-ro-string endianness)
  1185. (write-immediate asm buf (+ pos 4) #f) ; stringbuf
  1186. (bytevector-u32-set! buf (+ pos 8) 0 endianness)
  1187. (bytevector-u32-set! buf (+ pos 12) (string-length obj) endianness))
  1188. ((8)
  1189. (bytevector-u64-set! buf pos tc7-ro-string endianness)
  1190. (write-immediate asm buf (+ pos 8) #f) ; stringbuf
  1191. (bytevector-u64-set! buf (+ pos 16) 0 endianness)
  1192. (bytevector-u64-set! buf (+ pos 24) (string-length obj) endianness))
  1193. (else (error "bad word size")))))
  1194. ((pair? obj)
  1195. (write-constant-reference buf pos (car obj))
  1196. (write-constant-reference buf (+ pos word-size) (cdr obj)))
  1197. ((simple-vector? obj)
  1198. (let* ((len (vector-length obj))
  1199. (tag (logior tc7-vector (ash len 8))))
  1200. (case word-size
  1201. ((4) (bytevector-u32-set! buf pos tag endianness))
  1202. ((8) (bytevector-u64-set! buf pos tag endianness))
  1203. (else (error "bad word size")))
  1204. (let lp ((i 0))
  1205. (when (< i (vector-length obj))
  1206. (let ((pos (+ pos word-size (* i word-size)))
  1207. (elt (vector-ref obj i)))
  1208. (write-constant-reference buf pos elt)
  1209. (lp (1+ i)))))))
  1210. ((symbol? obj)
  1211. (write-immediate asm buf pos #f))
  1212. ((keyword? obj)
  1213. (write-immediate asm buf pos #f))
  1214. ((number? obj)
  1215. (write-immediate asm buf pos #f))
  1216. ((simple-uniform-vector? obj)
  1217. (let ((tag (if (bitvector? obj)
  1218. tc7-bitvector
  1219. (let ((type-code (array-type-code obj)))
  1220. (logior tc7-bytevector (ash type-code 7))))))
  1221. (case word-size
  1222. ((4)
  1223. (bytevector-u32-set! buf pos tag endianness)
  1224. (bytevector-u32-set! buf (+ pos 4)
  1225. (if (bitvector? obj)
  1226. (bitvector-length obj)
  1227. (bytevector-length obj))
  1228. endianness) ; length
  1229. (bytevector-u32-set! buf (+ pos 8) 0 endianness) ; pointer
  1230. (write-immediate asm buf (+ pos 12) #f)) ; owner
  1231. ((8)
  1232. (bytevector-u64-set! buf pos tag endianness)
  1233. (bytevector-u64-set! buf (+ pos 8)
  1234. (if (bitvector? obj)
  1235. (bitvector-length obj)
  1236. (bytevector-length obj))
  1237. endianness) ; length
  1238. (bytevector-u64-set! buf (+ pos 16) 0 endianness) ; pointer
  1239. (write-immediate asm buf (+ pos 24) #f)) ; owner
  1240. (else (error "bad word size")))))
  1241. ((uniform-vector-backing-store? obj)
  1242. (let ((bv (uniform-vector-backing-store-bytes obj)))
  1243. (bytevector-copy! bv 0 buf pos (bytevector-length bv))
  1244. (unless (or (= 1 (uniform-vector-backing-store-element-size obj))
  1245. (eq? endianness (native-endianness)))
  1246. ;; Need to swap units of element-size bytes
  1247. (error "FIXME: Implement byte order swap"))))
  1248. ((array? obj)
  1249. (let-values
  1250. ;; array tag + rank + contp flag: see libguile/arrays.h .
  1251. (((tag) (logior tc7-array (ash (array-rank obj) 17) (ash 1 16)))
  1252. ((bv-set! bvs-set!)
  1253. (case word-size
  1254. ((4) (values bytevector-u32-set! bytevector-s32-set!))
  1255. ((8) (values bytevector-u64-set! bytevector-s64-set!))
  1256. (else (error "bad word size")))))
  1257. (bv-set! buf pos tag endianness)
  1258. (write-immediate asm buf (+ pos word-size) #f) ; root vector (fixed later)
  1259. (bv-set! buf (+ pos (* word-size 2)) 0 endianness) ; base
  1260. (let lp ((pos (+ pos (* word-size 3)))
  1261. (bounds (array-shape obj))
  1262. (incs (shared-array-increments obj)))
  1263. (when (pair? bounds)
  1264. (bvs-set! buf pos (first (first bounds)) endianness)
  1265. (bvs-set! buf (+ pos word-size) (second (first bounds)) endianness)
  1266. (bvs-set! buf (+ pos (* word-size 2)) (first incs) endianness)
  1267. (lp (+ pos (* 3 word-size)) (cdr bounds) (cdr incs))))))
  1268. (else
  1269. (error "unrecognized object" obj))))
  1270. (cond
  1271. ((vlist-null? data) #f)
  1272. (else
  1273. (let* ((byte-len (vhash-fold (lambda (k v len)
  1274. (+ (byte-length k) (align len 8)))
  1275. 0 data))
  1276. (buf (make-bytevector byte-len 0)))
  1277. (let lp ((i 0) (pos 0) (symbols '()))
  1278. (if (< i (vlist-length data))
  1279. (let* ((pair (vlist-ref data i))
  1280. (obj (car pair))
  1281. (obj-label (cdr pair)))
  1282. (write buf pos obj)
  1283. (lp (1+ i)
  1284. (align (+ (byte-length obj) pos) 8)
  1285. (cons (make-linker-symbol obj-label pos) symbols)))
  1286. (make-object asm name buf '() symbols
  1287. #:flags (match name
  1288. ('.data (logior SHF_ALLOC SHF_WRITE))
  1289. ('.rodata SHF_ALLOC))))))))))
  1290. (define (link-constants asm)
  1291. "Link sections to hold constants needed by the program text emitted
  1292. using @var{asm}.
  1293. Returns three values: an object for the .rodata section, an object for
  1294. the .data section, and a label for an initialization procedure. Any of
  1295. these may be @code{#f}."
  1296. (define (shareable? x)
  1297. (cond
  1298. ((stringbuf? x) #t)
  1299. ((pair? x)
  1300. (and (immediate? (car x)) (immediate? (cdr x))))
  1301. ((simple-vector? x)
  1302. (let lp ((i 0))
  1303. (or (= i (vector-length x))
  1304. (and (immediate? (vector-ref x i))
  1305. (lp (1+ i))))))
  1306. ((uniform-vector-backing-store? x) #t)
  1307. (else #f)))
  1308. (let* ((constants (asm-constants asm))
  1309. (len (vlist-length constants)))
  1310. (let lp ((i 0)
  1311. (ro vlist-null)
  1312. (rw vlist-null))
  1313. (if (= i len)
  1314. (values (link-data asm ro '.rodata)
  1315. (link-data asm rw '.data)
  1316. (emit-init-constants asm))
  1317. (let ((pair (vlist-ref constants i)))
  1318. (if (shareable? (car pair))
  1319. (lp (1+ i) (vhash-consq (car pair) (cdr pair) ro) rw)
  1320. (lp (1+ i) ro (vhash-consq (car pair) (cdr pair) rw))))))))
  1321. ;;;
  1322. ;;; Linking program text.
  1323. ;;;
  1324. (define (process-relocs buf relocs labels)
  1325. "Patch up internal x8-s24 relocations, and any s32 relocations that
  1326. reference symbols in the text section. Return a list of linker
  1327. relocations for references to symbols defined outside the text section."
  1328. (fold
  1329. (lambda (reloc tail)
  1330. (match reloc
  1331. ((type label base word)
  1332. (let ((abs (hashq-ref labels label))
  1333. (dst (+ base word)))
  1334. (case type
  1335. ((s32)
  1336. (if abs
  1337. (let ((rel (- abs base)))
  1338. (s32-set! buf dst rel)
  1339. tail)
  1340. (cons (make-linker-reloc 'rel32/4 (* dst 4) word label)
  1341. tail)))
  1342. ((x8-s24)
  1343. (unless abs
  1344. (error "unbound near relocation" reloc))
  1345. (let ((rel (- abs base))
  1346. (u32 (u32-ref buf dst)))
  1347. (u32-set! buf dst (pack-u8-s24 (logand u32 #xff) rel))
  1348. tail))
  1349. (else (error "bad relocation kind" reloc)))))))
  1350. '()
  1351. relocs))
  1352. (define (process-labels labels)
  1353. "Define linker symbols for the label-offset map in @var{labels}.
  1354. The offsets are expected to be expressed in words."
  1355. (hash-map->list (lambda (label loc)
  1356. (make-linker-symbol label (* loc 4)))
  1357. labels))
  1358. (define (swap-bytes! buf)
  1359. "Patch up the text buffer @var{buf}, swapping the endianness of each
  1360. 32-bit unit."
  1361. (unless (zero? (modulo (bytevector-length buf) 4))
  1362. (error "unexpected length"))
  1363. (let ((byte-len (bytevector-length buf)))
  1364. (let lp ((pos 0))
  1365. (unless (= pos byte-len)
  1366. (bytevector-u32-set!
  1367. buf pos
  1368. (bytevector-u32-ref buf pos (endianness big))
  1369. (endianness little))
  1370. (lp (+ pos 4))))))
  1371. (define (link-text-object asm)
  1372. "Link the .rtl-text section, swapping the endianness of the bytes if
  1373. needed."
  1374. (let ((buf (make-u32vector (asm-pos asm))))
  1375. (let lp ((pos 0) (prev (reverse (asm-prev asm))))
  1376. (if (null? prev)
  1377. (let ((byte-size (* (asm-idx asm) 4)))
  1378. (bytevector-copy! (asm-cur asm) 0 buf pos byte-size)
  1379. (unless (eq? (asm-endianness asm) (native-endianness))
  1380. (swap-bytes! buf))
  1381. (make-object asm '.rtl-text
  1382. buf
  1383. (process-relocs buf (asm-relocs asm)
  1384. (asm-labels asm))
  1385. (process-labels (asm-labels asm))))
  1386. (let ((len (* *block-size* 4)))
  1387. (bytevector-copy! (car prev) 0 buf pos len)
  1388. (lp (+ pos len) (cdr prev)))))))
  1389. ;;;
  1390. ;;; Create the frame maps. These maps are used by GC to identify dead
  1391. ;;; slots in pending call frames, to avoid marking them. We only do
  1392. ;;; this when frame makes a non-tail call, as that is the common case.
  1393. ;;; Only the topmost frame will see a GC at any other point, but we mark
  1394. ;;; top frames conservatively as serializing live slot maps at every
  1395. ;;; instruction would take up too much space in the object file.
  1396. ;;;
  1397. ;; The .guile.frame-maps section starts with two packed u32 values: one
  1398. ;; indicating the offset of the first byte of the .rtl-text section, and
  1399. ;; another indicating the relative offset in bytes of the slots data.
  1400. (define frame-maps-prefix-len 8)
  1401. ;; Each header is 8 bytes: 4 for the offset from .rtl_text, and 4 for
  1402. ;; the offset of the slot map from the beginning of the
  1403. ;; .guile.frame-maps section. The length of a frame map depends on the
  1404. ;; frame size at the call site, and is not encoded into this section as
  1405. ;; it is available at run-time.
  1406. (define frame-map-header-len 8)
  1407. (define (link-frame-maps asm)
  1408. (define (map-byte-length proc-slot)
  1409. (ceiling-quotient (- proc-slot 2) 8))
  1410. (define (make-frame-maps maps count map-len)
  1411. (let* ((endianness (asm-endianness asm))
  1412. (header-pos frame-maps-prefix-len)
  1413. (map-pos (+ header-pos (* count frame-map-header-len)))
  1414. (bv (make-bytevector (+ map-pos map-len) 0)))
  1415. (bytevector-u32-set! bv 4 map-pos endianness)
  1416. (let lp ((maps maps) (header-pos header-pos) (map-pos map-pos))
  1417. (match maps
  1418. (()
  1419. (make-object asm '.guile.frame-maps bv
  1420. (list (make-linker-reloc 'abs32/1 0 0 '.rtl-text))
  1421. '() #:type SHT_PROGBITS #:flags SHF_ALLOC))
  1422. (((pos proc-slot . map) . maps)
  1423. (bytevector-u32-set! bv header-pos (* pos 4) endianness)
  1424. (bytevector-u32-set! bv (+ header-pos 4) map-pos endianness)
  1425. (let write-bytes ((map-pos map-pos)
  1426. (map map)
  1427. (byte-length (map-byte-length proc-slot)))
  1428. (if (zero? byte-length)
  1429. (lp maps (+ header-pos frame-map-header-len) map-pos)
  1430. (begin
  1431. (bytevector-u8-set! bv map-pos (logand map #xff))
  1432. (write-bytes (1+ map-pos) (ash map -8)
  1433. (1- byte-length))))))))))
  1434. (match (asm-dead-slot-maps asm)
  1435. (() #f)
  1436. (in
  1437. (let lp ((in in) (out '()) (count 0) (map-len 0))
  1438. (match in
  1439. (() (make-frame-maps out count map-len))
  1440. (((and head (pos proc-slot . map)) . in)
  1441. (lp in (cons head out)
  1442. (1+ count)
  1443. (+ (map-byte-length proc-slot) map-len))))))))
  1444. ;;;
  1445. ;;; Linking other sections of the ELF file, like the dynamic segment,
  1446. ;;; the symbol table, etc.
  1447. ;;;
  1448. ;; FIXME: Define these somewhere central, shared with C.
  1449. (define *bytecode-major-version* #x0202)
  1450. (define *bytecode-minor-version* 6)
  1451. (define (link-dynamic-section asm text rw rw-init frame-maps)
  1452. "Link the dynamic section for an ELF image with bytecode @var{text},
  1453. given the writable data section @var{rw} needing fixup from the
  1454. procedure with label @var{rw-init}. @var{rw-init} may be false. If
  1455. @var{rw} is true, it will be added to the GC roots at runtime."
  1456. (define-syntax-rule (emit-dynamic-section word-size %set-uword! reloc-type)
  1457. (let* ((endianness (asm-endianness asm))
  1458. (words 6)
  1459. (words (if rw (+ words 4) words))
  1460. (words (if rw-init (+ words 2) words))
  1461. (words (if frame-maps (+ words 2) words))
  1462. (bv (make-bytevector (* word-size words) 0))
  1463. (set-uword!
  1464. (lambda (i uword)
  1465. (%set-uword! bv (* i word-size) uword endianness)))
  1466. (relocs '())
  1467. (set-label!
  1468. (lambda (i label)
  1469. (set! relocs (cons (make-linker-reloc 'reloc-type
  1470. (* i word-size) 0 label)
  1471. relocs))
  1472. (%set-uword! bv (* i word-size) 0 endianness))))
  1473. (set-uword! 0 DT_GUILE_VM_VERSION)
  1474. (set-uword! 1 (logior (ash *bytecode-major-version* 16)
  1475. *bytecode-minor-version*))
  1476. (set-uword! 2 DT_GUILE_ENTRY)
  1477. (set-label! 3 '.rtl-text)
  1478. (when rw
  1479. ;; Add roots to GC.
  1480. (set-uword! 4 DT_GUILE_GC_ROOT)
  1481. (set-label! 5 '.data)
  1482. (set-uword! 6 DT_GUILE_GC_ROOT_SZ)
  1483. (set-uword! 7 (bytevector-length (linker-object-bv rw)))
  1484. (when rw-init
  1485. (set-uword! 8 DT_INIT) ; constants
  1486. (set-label! 9 rw-init)))
  1487. (when frame-maps
  1488. (set-uword! (- words 4) DT_GUILE_FRAME_MAPS)
  1489. (set-label! (- words 3) '.guile.frame-maps))
  1490. (set-uword! (- words 2) DT_NULL)
  1491. (set-uword! (- words 1) 0)
  1492. (make-object asm '.dynamic bv relocs '()
  1493. #:type SHT_DYNAMIC #:flags SHF_ALLOC)))
  1494. (case (asm-word-size asm)
  1495. ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1))
  1496. ((8) (emit-dynamic-section 8 bytevector-u64-set! abs64/1))
  1497. (else (error "bad word size" asm))))
  1498. (define (link-shstrtab asm)
  1499. "Link the string table for the section headers."
  1500. (intern-section-name! asm ".shstrtab")
  1501. (make-object asm '.shstrtab
  1502. (link-string-table! (asm-shstrtab asm))
  1503. '() '()
  1504. #:type SHT_STRTAB #:flags 0))
  1505. (define (link-symtab text-section asm)
  1506. (let* ((endianness (asm-endianness asm))
  1507. (word-size (asm-word-size asm))
  1508. (size (elf-symbol-len word-size))
  1509. (meta (reverse (asm-meta asm)))
  1510. (n (length meta))
  1511. (strtab (make-string-table))
  1512. (bv (make-bytevector (* n size) 0)))
  1513. (define (intern-string! name)
  1514. (string-table-intern! strtab (if name (symbol->string name) "")))
  1515. (for-each
  1516. (lambda (meta n)
  1517. (let ((name (intern-string! (meta-name meta))))
  1518. (write-elf-symbol bv (* n size) endianness word-size
  1519. (make-elf-symbol
  1520. #:name name
  1521. ;; Symbol value and size are measured in
  1522. ;; bytes, not u32s.
  1523. #:value (* 4 (meta-low-pc meta))
  1524. #:size (* 4 (- (meta-high-pc meta)
  1525. (meta-low-pc meta)))
  1526. #:type STT_FUNC
  1527. #:visibility STV_HIDDEN
  1528. #:shndx (elf-section-index text-section)))))
  1529. meta (iota n))
  1530. (let ((strtab (make-object asm '.strtab
  1531. (link-string-table! strtab)
  1532. '() '()
  1533. #:type SHT_STRTAB #:flags 0)))
  1534. (values (make-object asm '.symtab
  1535. bv
  1536. '() '()
  1537. #:type SHT_SYMTAB #:flags 0 #:entsize size
  1538. #:link (elf-section-index
  1539. (linker-object-section strtab)))
  1540. strtab))))
  1541. ;;; The .guile.arities section describes the arities that a function can
  1542. ;;; have. It is in two parts: a sorted array of headers describing
  1543. ;;; basic arities, and an array of links out to a string table (and in
  1544. ;;; the case of keyword arguments, to the data section) for argument
  1545. ;;; names. The whole thing is prefixed by a uint32 indicating the
  1546. ;;; offset of the end of the headers array.
  1547. ;;;
  1548. ;;; The arity headers array is a packed array of structures of the form:
  1549. ;;;
  1550. ;;; struct arity_header {
  1551. ;;; uint32_t low_pc;
  1552. ;;; uint32_t high_pc;
  1553. ;;; uint32_t offset;
  1554. ;;; uint32_t flags;
  1555. ;;; uint32_t nreq;
  1556. ;;; uint32_t nopt;
  1557. ;;; uint32_t nlocals;
  1558. ;;; }
  1559. ;;;
  1560. ;;; All of the offsets and addresses are 32 bits. We can expand in the
  1561. ;;; future to use 64-bit offsets if appropriate, but there are other
  1562. ;;; aspects of bytecode that constrain us to a total image that fits in
  1563. ;;; 32 bits, so for the moment we'll simplify the problem space.
  1564. ;;;
  1565. ;;; The following flags values are defined:
  1566. ;;;
  1567. ;;; #x1: has-rest?
  1568. ;;; #x2: allow-other-keys?
  1569. ;;; #x4: has-keyword-args?
  1570. ;;; #x8: is-case-lambda?
  1571. ;;; #x10: is-in-case-lambda?
  1572. ;;;
  1573. ;;; Functions with a single arity specify their number of required and
  1574. ;;; optional arguments in nreq and nopt, and do not have the
  1575. ;;; is-case-lambda? flag set. Their "offset" member links to an array
  1576. ;;; of pointers into the associated .guile.arities.strtab string table,
  1577. ;;; identifying the argument names. This offset is relative to the
  1578. ;;; start of the .guile.arities section.
  1579. ;;;
  1580. ;;; If the arity has keyword arguments -- if has-keyword-args? is set in
  1581. ;;; the flags -- the first uint32 pointed to by offset encodes a link to
  1582. ;;; the "keyword indices" literal, in the data section. Then follow the
  1583. ;;; names for all locals, in order, as uleb128 values. The required
  1584. ;;; arguments will be the first locals, followed by the optionals,
  1585. ;;; followed by the rest argument if if has-rest? is set. The names
  1586. ;;; point into the associated string table section.
  1587. ;;;
  1588. ;;; Functions with no arities have no arities information present in the
  1589. ;;; .guile.arities section.
  1590. ;;;
  1591. ;;; Functions with multiple arities are preceded by a header with
  1592. ;;; is-case-lambda? set. All other fields are 0, except low-pc and
  1593. ;;; high-pc which should be the bounds of the whole function. Headers
  1594. ;;; for the individual arities follow, with the is-in-case-lambda? flag
  1595. ;;; set. In this way the whole headers array is sorted in increasing
  1596. ;;; low-pc order, and case-lambda clauses are contained within the
  1597. ;;; [low-pc, high-pc] of the case-lambda header.
  1598. ;; Length of the prefix to the arities section, in bytes.
  1599. (define arities-prefix-len 4)
  1600. ;; Length of an arity header, in bytes.
  1601. (define arity-header-len (* 7 4))
  1602. ;; Some helpers.
  1603. (define (put-uleb128 port val)
  1604. (let lp ((val val))
  1605. (let ((next (ash val -7)))
  1606. (if (zero? next)
  1607. (put-u8 port val)
  1608. (begin
  1609. (put-u8 port (logior #x80 (logand val #x7f)))
  1610. (lp next))))))
  1611. (define (put-sleb128 port val)
  1612. (let lp ((val val))
  1613. (if (<= 0 (+ val 64) 127)
  1614. (put-u8 port (logand val #x7f))
  1615. (begin
  1616. (put-u8 port (logior #x80 (logand val #x7f)))
  1617. (lp (ash val -7))))))
  1618. (define (port-position port)
  1619. (seek port 0 SEEK_CUR))
  1620. (define-inline (pack-arity-flags has-rest? allow-other-keys?
  1621. has-keyword-args? is-case-lambda?
  1622. is-in-case-lambda?)
  1623. (logior (if has-rest? (ash 1 0) 0)
  1624. (if allow-other-keys? (ash 1 1) 0)
  1625. (if has-keyword-args? (ash 1 2) 0)
  1626. (if is-case-lambda? (ash 1 3) 0)
  1627. (if is-in-case-lambda? (ash 1 4) 0)))
  1628. (define (write-arities asm metas headers names-port strtab)
  1629. (define (write-header pos low-pc high-pc offset flags nreq nopt nlocals)
  1630. (unless (<= (+ nreq nopt) nlocals)
  1631. (error "forgot to emit definition instructions?"))
  1632. (bytevector-u32-set! headers pos (* low-pc 4) (asm-endianness asm))
  1633. (bytevector-u32-set! headers (+ pos 4) (* high-pc 4) (asm-endianness asm))
  1634. (bytevector-u32-set! headers (+ pos 8) offset (asm-endianness asm))
  1635. (bytevector-u32-set! headers (+ pos 12) flags (asm-endianness asm))
  1636. (bytevector-u32-set! headers (+ pos 16) nreq (asm-endianness asm))
  1637. (bytevector-u32-set! headers (+ pos 20) nopt (asm-endianness asm))
  1638. (bytevector-u32-set! headers (+ pos 24) nlocals (asm-endianness asm)))
  1639. (define (write-kw-indices kw-indices relocs)
  1640. ;; FIXME: Assert that kw-indices is already interned.
  1641. (if (pair? kw-indices)
  1642. (let ((pos (+ (bytevector-length headers)
  1643. (port-position names-port)))
  1644. (label (intern-constant asm kw-indices)))
  1645. (put-bytevector names-port #vu8(0 0 0 0))
  1646. (cons (make-linker-reloc 'abs32/1 pos 0 label) relocs))
  1647. relocs))
  1648. (define (write-arity pos arity in-case-lambda? relocs)
  1649. (write-header pos (arity-low-pc arity)
  1650. (arity-high-pc arity)
  1651. ;; FIXME: Seems silly to add on bytevector-length of
  1652. ;; headers, given the arities-prefix.
  1653. (+ (bytevector-length headers) (port-position names-port))
  1654. (pack-arity-flags (arity-rest arity)
  1655. (arity-allow-other-keys? arity)
  1656. (pair? (arity-kw-indices arity))
  1657. #f
  1658. in-case-lambda?)
  1659. (length (arity-req arity))
  1660. (length (arity-opt arity))
  1661. (length (arity-definitions arity)))
  1662. (let ((relocs (write-kw-indices (arity-kw-indices arity) relocs)))
  1663. ;; Write local names.
  1664. (let lp ((definitions (arity-definitions arity)))
  1665. (match definitions
  1666. (() relocs)
  1667. ((#(name slot def) . definitions)
  1668. (let ((sym (if (symbol? name)
  1669. (string-table-intern! strtab (symbol->string name))
  1670. 0)))
  1671. (put-uleb128 names-port sym)
  1672. (lp definitions)))))
  1673. ;; Now write their definitions.
  1674. (let lp ((definitions (arity-definitions arity)))
  1675. (match definitions
  1676. (() relocs)
  1677. ((#(name slot def) . definitions)
  1678. (put-uleb128 names-port def)
  1679. (put-uleb128 names-port slot)
  1680. (lp definitions))))))
  1681. (let lp ((metas metas) (pos arities-prefix-len) (relocs '()))
  1682. (match metas
  1683. (()
  1684. (unless (= pos (bytevector-length headers))
  1685. (error "expected to fully fill the bytevector"
  1686. pos (bytevector-length headers)))
  1687. relocs)
  1688. ((meta . metas)
  1689. (match (meta-arities meta)
  1690. (() (lp metas pos relocs))
  1691. ((arity)
  1692. (lp metas
  1693. (+ pos arity-header-len)
  1694. (write-arity pos arity #f relocs)))
  1695. (arities
  1696. ;; Write a case-lambda header, then individual arities.
  1697. ;; The case-lambda header's offset link is 0.
  1698. (write-header pos (meta-low-pc meta) (meta-high-pc meta) 0
  1699. (pack-arity-flags #f #f #f #t #f) 0 0 0)
  1700. (let lp* ((arities arities) (pos (+ pos arity-header-len))
  1701. (relocs relocs))
  1702. (match arities
  1703. (() (lp metas pos relocs))
  1704. ((arity . arities)
  1705. (lp* arities
  1706. (+ pos arity-header-len)
  1707. (write-arity pos arity #t relocs)))))))))))
  1708. (define (link-arities asm)
  1709. (define (meta-arities-header-size meta)
  1710. (define (lambda-size arity)
  1711. arity-header-len)
  1712. (define (case-lambda-size arities)
  1713. (fold +
  1714. arity-header-len ;; case-lambda header
  1715. (map lambda-size arities))) ;; the cases
  1716. (match (meta-arities meta)
  1717. (() 0)
  1718. ((arity) (lambda-size arity))
  1719. (arities (case-lambda-size arities))))
  1720. (define (bytevector-append a b)
  1721. (let ((out (make-bytevector (+ (bytevector-length a)
  1722. (bytevector-length b)))))
  1723. (bytevector-copy! a 0 out 0 (bytevector-length a))
  1724. (bytevector-copy! b 0 out (bytevector-length a) (bytevector-length b))
  1725. out))
  1726. (let* ((endianness (asm-endianness asm))
  1727. (metas (reverse (asm-meta asm)))
  1728. (header-size (fold (lambda (meta size)
  1729. (+ size (meta-arities-header-size meta)))
  1730. arities-prefix-len
  1731. metas))
  1732. (strtab (make-string-table))
  1733. (headers (make-bytevector header-size 0)))
  1734. (bytevector-u32-set! headers 0 (bytevector-length headers) endianness)
  1735. (let-values (((names-port get-name-bv) (open-bytevector-output-port)))
  1736. (let* ((relocs (write-arities asm metas headers names-port strtab))
  1737. (strtab (make-object asm '.guile.arities.strtab
  1738. (link-string-table! strtab)
  1739. '() '()
  1740. #:type SHT_STRTAB #:flags 0)))
  1741. (values (make-object asm '.guile.arities
  1742. (bytevector-append headers (get-name-bv))
  1743. relocs '()
  1744. #:type SHT_PROGBITS #:flags 0
  1745. #:link (elf-section-index
  1746. (linker-object-section strtab)))
  1747. strtab)))))
  1748. ;;;
  1749. ;;; The .guile.docstrs section is a packed, sorted array of (pc, str)
  1750. ;;; values. Pc and str are both 32 bits wide. (Either could change to
  1751. ;;; 64 bits if appropriate in the future.) Pc is the address of the
  1752. ;;; entry to a program, relative to the start of the text section, in
  1753. ;;; bytes, and str is an index into the associated .guile.docstrs.strtab
  1754. ;;; string table section.
  1755. ;;;
  1756. ;; The size of a docstrs entry, in bytes.
  1757. (define docstr-size 8)
  1758. (define (link-docstrs asm)
  1759. (define (find-docstrings)
  1760. (filter-map (lambda (meta)
  1761. (define (is-documentation? pair)
  1762. (eq? (car pair) 'documentation))
  1763. (let* ((props (meta-properties meta))
  1764. (tail (find-tail is-documentation? props)))
  1765. (and tail
  1766. (not (find-tail is-documentation? (cdr tail)))
  1767. (string? (cdar tail))
  1768. (cons (* 4 (meta-low-pc meta)) (cdar tail)))))
  1769. (reverse (asm-meta asm))))
  1770. (let* ((endianness (asm-endianness asm))
  1771. (docstrings (find-docstrings))
  1772. (strtab (make-string-table))
  1773. (bv (make-bytevector (* (length docstrings) docstr-size) 0)))
  1774. (fold (lambda (pair pos)
  1775. (match pair
  1776. ((pc . string)
  1777. (bytevector-u32-set! bv pos pc endianness)
  1778. (bytevector-u32-set! bv (+ pos 4)
  1779. (string-table-intern! strtab string)
  1780. endianness)
  1781. (+ pos docstr-size))))
  1782. 0
  1783. docstrings)
  1784. (let ((strtab (make-object asm '.guile.docstrs.strtab
  1785. (link-string-table! strtab)
  1786. '() '()
  1787. #:type SHT_STRTAB #:flags 0)))
  1788. (values (make-object asm '.guile.docstrs
  1789. bv
  1790. '() '()
  1791. #:type SHT_PROGBITS #:flags 0
  1792. #:link (elf-section-index
  1793. (linker-object-section strtab)))
  1794. strtab))))
  1795. ;;;
  1796. ;;; The .guile.procprops section is a packed, sorted array of (pc, addr)
  1797. ;;; values. Pc and addr are both 32 bits wide. (Either could change to
  1798. ;;; 64 bits if appropriate in the future.) Pc is the address of the
  1799. ;;; entry to a program, relative to the start of the text section, and
  1800. ;;; addr is the address of the associated properties alist, relative to
  1801. ;;; the start of the ELF image.
  1802. ;;;
  1803. ;;; Since procedure properties are stored in the data sections, we need
  1804. ;;; to link the procedures property section first. (Note that this
  1805. ;;; constraint does not apply to the arities section, which may
  1806. ;;; reference the data sections via the kw-indices literal, because
  1807. ;;; assembling the text section already makes sure that the kw-indices
  1808. ;;; are interned.)
  1809. ;;;
  1810. ;; The size of a procprops entry, in bytes.
  1811. (define procprops-size 8)
  1812. (define (link-procprops asm)
  1813. (define (assoc-remove-one alist key value-pred)
  1814. (match alist
  1815. (() '())
  1816. ((((? (lambda (x) (eq? x key))) . value) . alist)
  1817. (if (value-pred value)
  1818. alist
  1819. (acons key value alist)))
  1820. (((k . v) . alist)
  1821. (acons k v (assoc-remove-one alist key value-pred)))))
  1822. (define (props-without-name-or-docstring meta)
  1823. (assoc-remove-one
  1824. (assoc-remove-one (meta-properties meta) 'name (lambda (x) #t))
  1825. 'documentation
  1826. string?))
  1827. (define (find-procprops)
  1828. (filter-map (lambda (meta)
  1829. (let ((props (props-without-name-or-docstring meta)))
  1830. (and (pair? props)
  1831. (cons (* 4 (meta-low-pc meta)) props))))
  1832. (reverse (asm-meta asm))))
  1833. (let* ((endianness (asm-endianness asm))
  1834. (procprops (find-procprops))
  1835. (bv (make-bytevector (* (length procprops) procprops-size) 0)))
  1836. (let lp ((procprops procprops) (pos 0) (relocs '()))
  1837. (match procprops
  1838. (()
  1839. (make-object asm '.guile.procprops
  1840. bv
  1841. relocs '()
  1842. #:type SHT_PROGBITS #:flags 0))
  1843. (((pc . props) . procprops)
  1844. (bytevector-u32-set! bv pos pc endianness)
  1845. (lp procprops
  1846. (+ pos procprops-size)
  1847. (cons (make-linker-reloc 'abs32/1 (+ pos 4) 0
  1848. (intern-constant asm props))
  1849. relocs)))))))
  1850. ;;;
  1851. ;;; The DWARF .debug_info, .debug_abbrev, .debug_str, and .debug_loc
  1852. ;;; sections provide line number and local variable liveness
  1853. ;;; information. Their format is defined by the DWARF
  1854. ;;; specifications.
  1855. ;;;
  1856. (define (asm-language asm)
  1857. ;; FIXME: Plumb language through to the assembler.
  1858. 'scheme)
  1859. ;; -> 5 values: .debug_info, .debug_abbrev, .debug_str, .debug_loc, .debug_lines
  1860. (define (link-debug asm)
  1861. (define (put-s8 port val)
  1862. (let ((bv (make-bytevector 1)))
  1863. (bytevector-s8-set! bv 0 val)
  1864. (put-bytevector port bv)))
  1865. (define (put-u16 port val)
  1866. (let ((bv (make-bytevector 2)))
  1867. (bytevector-u16-set! bv 0 val (asm-endianness asm))
  1868. (put-bytevector port bv)))
  1869. (define (put-u32 port val)
  1870. (let ((bv (make-bytevector 4)))
  1871. (bytevector-u32-set! bv 0 val (asm-endianness asm))
  1872. (put-bytevector port bv)))
  1873. (define (put-u64 port val)
  1874. (let ((bv (make-bytevector 8)))
  1875. (bytevector-u64-set! bv 0 val (asm-endianness asm))
  1876. (put-bytevector port bv)))
  1877. (define (meta->subprogram-die meta)
  1878. `(subprogram
  1879. (@ ,@(cond
  1880. ((meta-name meta)
  1881. => (lambda (name) `((name ,(symbol->string name)))))
  1882. (else
  1883. '()))
  1884. (low-pc ,(meta-label meta))
  1885. (high-pc ,(* 4 (- (meta-high-pc meta) (meta-low-pc meta)))))))
  1886. (define (make-compile-unit-die asm)
  1887. `(compile-unit
  1888. (@ (producer ,(string-append "Guile " (version)))
  1889. (language ,(asm-language asm))
  1890. (low-pc .rtl-text)
  1891. (high-pc ,(* 4 (asm-pos asm)))
  1892. (stmt-list 0))
  1893. ,@(map meta->subprogram-die (reverse (asm-meta asm)))))
  1894. (let-values (((die-port get-die-bv) (open-bytevector-output-port))
  1895. ((die-relocs) '())
  1896. ((abbrev-port get-abbrev-bv) (open-bytevector-output-port))
  1897. ;; (tag has-kids? attrs forms) -> code
  1898. ((abbrevs) vlist-null)
  1899. ((strtab) (make-string-table))
  1900. ((line-port get-line-bv) (open-bytevector-output-port))
  1901. ((line-relocs) '())
  1902. ;; file -> code
  1903. ((files) vlist-null))
  1904. (define (write-abbrev code tag has-children? attrs forms)
  1905. (put-uleb128 abbrev-port code)
  1906. (put-uleb128 abbrev-port (tag-name->code tag))
  1907. (put-u8 abbrev-port (children-name->code (if has-children? 'yes 'no)))
  1908. (for-each (lambda (attr form)
  1909. (put-uleb128 abbrev-port (attribute-name->code attr))
  1910. (put-uleb128 abbrev-port (form-name->code form)))
  1911. attrs forms)
  1912. (put-uleb128 abbrev-port 0)
  1913. (put-uleb128 abbrev-port 0))
  1914. (define (intern-abbrev tag has-children? attrs forms)
  1915. (let ((key (list tag has-children? attrs forms)))
  1916. (match (vhash-assoc key abbrevs)
  1917. ((_ . code) code)
  1918. (#f (let ((code (1+ (vlist-length abbrevs))))
  1919. (set! abbrevs (vhash-cons key code abbrevs))
  1920. (write-abbrev code tag has-children? attrs forms)
  1921. code)))))
  1922. (define (intern-file file)
  1923. (match (vhash-assoc file files)
  1924. ((_ . code) code)
  1925. (#f (let ((code (1+ (vlist-length files))))
  1926. (set! files (vhash-cons file code files))
  1927. code))))
  1928. (define (write-sources)
  1929. ;; Choose line base and line range values that will allow for an
  1930. ;; address advance range of 16 words. The special opcode range is
  1931. ;; from 10 to 255, so 246 values.
  1932. (define base -4)
  1933. (define range 15)
  1934. (let lp ((sources (asm-sources asm)) (out '()))
  1935. (match sources
  1936. (((pc . s) . sources)
  1937. (let ((file (assq-ref s 'filename))
  1938. (line (assq-ref s 'line))
  1939. (col (assq-ref s 'column)))
  1940. (lp sources
  1941. ;; Guile line and column numbers are 0-indexed, but
  1942. ;; they are 1-indexed for DWARF.
  1943. (if (and line col)
  1944. (cons (list pc
  1945. (if (string? file) (intern-file file) 0)
  1946. (1+ line)
  1947. (1+ col))
  1948. out)
  1949. out))))
  1950. (()
  1951. ;; Compilation unit header for .debug_line. We write in
  1952. ;; DWARF 2 format because more tools understand it than DWARF
  1953. ;; 4, which incompatibly adds another field to this header.
  1954. (put-u32 line-port 0) ; Length; will patch later.
  1955. (put-u16 line-port 2) ; DWARF 2 format.
  1956. (put-u32 line-port 0) ; Prologue length; will patch later.
  1957. (put-u8 line-port 4) ; Minimum instruction length: 4 bytes.
  1958. (put-u8 line-port 1) ; Default is-stmt: true.
  1959. (put-s8 line-port base) ; Line base. See the DWARF standard.
  1960. (put-u8 line-port range) ; Line range. See the DWARF standard.
  1961. (put-u8 line-port 10) ; Opcode base: the first "special" opcode.
  1962. ;; A table of the number of uleb128 arguments taken by each
  1963. ;; of the standard opcodes.
  1964. (put-u8 line-port 0) ; 1: copy
  1965. (put-u8 line-port 1) ; 2: advance-pc
  1966. (put-u8 line-port 1) ; 3: advance-line
  1967. (put-u8 line-port 1) ; 4: set-file
  1968. (put-u8 line-port 1) ; 5: set-column
  1969. (put-u8 line-port 0) ; 6: negate-stmt
  1970. (put-u8 line-port 0) ; 7: set-basic-block
  1971. (put-u8 line-port 0) ; 8: const-add-pc
  1972. (put-u8 line-port 1) ; 9: fixed-advance-pc
  1973. ;; Include directories, as a zero-terminated sequence of
  1974. ;; nul-terminated strings. Nothing, for the moment.
  1975. (put-u8 line-port 0)
  1976. ;; File table. For each file that contributes to this
  1977. ;; compilation unit, a nul-terminated file name string, and a
  1978. ;; uleb128 for each of directory the file was found in, the
  1979. ;; modification time, and the file's size in bytes. We pass
  1980. ;; zero for the latter three fields.
  1981. (vlist-fold-right
  1982. (lambda (pair seed)
  1983. (match pair
  1984. ((file . code)
  1985. (put-bytevector line-port (string->utf8 file))
  1986. (put-u8 line-port 0)
  1987. (put-uleb128 line-port 0) ; directory
  1988. (put-uleb128 line-port 0) ; mtime
  1989. (put-uleb128 line-port 0))) ; size
  1990. seed)
  1991. #f
  1992. files)
  1993. (put-u8 line-port 0) ; 0 byte terminating file list.
  1994. ;; Patch prologue length.
  1995. (let ((offset (port-position line-port)))
  1996. (seek line-port 6 SEEK_SET)
  1997. (put-u32 line-port (- offset 10))
  1998. (seek line-port offset SEEK_SET))
  1999. ;; Now write the statement program.
  2000. (let ()
  2001. (define (extended-op opcode payload-len)
  2002. (put-u8 line-port 0) ; extended op
  2003. (put-uleb128 line-port (1+ payload-len)) ; payload-len + opcode
  2004. (put-uleb128 line-port opcode))
  2005. (define (set-address sym)
  2006. (define (add-reloc! kind)
  2007. (set! line-relocs
  2008. (cons (make-linker-reloc kind
  2009. (port-position line-port)
  2010. 0
  2011. sym)
  2012. line-relocs)))
  2013. (match (asm-word-size asm)
  2014. (4
  2015. (extended-op 2 4)
  2016. (add-reloc! 'abs32/1)
  2017. (put-u32 line-port 0))
  2018. (8
  2019. (extended-op 2 8)
  2020. (add-reloc! 'abs64/1)
  2021. (put-u64 line-port 0))))
  2022. (define (end-sequence pc)
  2023. (let ((pc-inc (- (asm-pos asm) pc)))
  2024. (put-u8 line-port 2) ; advance-pc
  2025. (put-uleb128 line-port pc-inc))
  2026. (extended-op 1 0))
  2027. (define (advance-pc pc-inc line-inc)
  2028. (let ((spec (+ (- line-inc base) (* pc-inc range) 10)))
  2029. (cond
  2030. ((or (< line-inc base) (>= line-inc (+ base range)))
  2031. (advance-line line-inc)
  2032. (advance-pc pc-inc 0))
  2033. ((<= spec 255)
  2034. (put-u8 line-port spec))
  2035. ((< spec 500)
  2036. (put-u8 line-port 8) ; const-advance-pc
  2037. (advance-pc (- pc-inc (floor/ (- 255 10) range))
  2038. line-inc))
  2039. (else
  2040. (put-u8 line-port 2) ; advance-pc
  2041. (put-uleb128 line-port pc-inc)
  2042. (advance-pc 0 line-inc)))))
  2043. (define (advance-line inc)
  2044. (put-u8 line-port 3)
  2045. (put-sleb128 line-port inc))
  2046. (define (set-file file)
  2047. (put-u8 line-port 4)
  2048. (put-uleb128 line-port file))
  2049. (define (set-column col)
  2050. (put-u8 line-port 5)
  2051. (put-uleb128 line-port col))
  2052. (set-address '.rtl-text)
  2053. (let lp ((in out) (pc 0) (file 1) (line 1) (col 0))
  2054. (match in
  2055. (()
  2056. (when (null? out)
  2057. ;; There was no source info in the first place. Set
  2058. ;; file register to 0 before adding final row.
  2059. (set-file 0))
  2060. (end-sequence pc))
  2061. (((pc* file* line* col*) . in*)
  2062. (cond
  2063. ((and (eqv? file file*) (eqv? line line*) (eqv? col col*))
  2064. (lp in* pc file line col))
  2065. (else
  2066. (unless (eqv? col col*)
  2067. (set-column col*))
  2068. (unless (eqv? file file*)
  2069. (set-file file*))
  2070. (advance-pc (- pc* pc) (- line* line))
  2071. (lp in* pc* file* line* col*)))))))))))
  2072. (define (compute-code attr val)
  2073. (match attr
  2074. ('name (string-table-intern! strtab val))
  2075. ('low-pc val)
  2076. ('high-pc val)
  2077. ('producer (string-table-intern! strtab val))
  2078. ('language (language-name->code val))
  2079. ('stmt-list val)))
  2080. (define (choose-form attr val code)
  2081. (cond
  2082. ((string? val) 'strp)
  2083. ((eq? attr 'stmt-list) 'sec-offset)
  2084. ((eq? attr 'low-pc) 'addr)
  2085. ((exact-integer? code)
  2086. (cond
  2087. ((< code 0) 'sleb128)
  2088. ((<= code #xff) 'data1)
  2089. ((<= code #xffff) 'data2)
  2090. ((<= code #xffffffff) 'data4)
  2091. ((<= code #xffffffffffffffff) 'data8)
  2092. (else 'uleb128)))
  2093. (else (error "unhandled case" attr val code))))
  2094. (define (add-die-relocation! kind sym)
  2095. (set! die-relocs
  2096. (cons (make-linker-reloc kind (port-position die-port) 0 sym)
  2097. die-relocs)))
  2098. (define (write-value code form)
  2099. (match form
  2100. ('data1 (put-u8 die-port code))
  2101. ('data2 (put-u16 die-port code))
  2102. ('data4 (put-u32 die-port code))
  2103. ('data8 (put-u64 die-port code))
  2104. ('uleb128 (put-uleb128 die-port code))
  2105. ('sleb128 (put-sleb128 die-port code))
  2106. ('addr
  2107. (match (asm-word-size asm)
  2108. (4
  2109. (add-die-relocation! 'abs32/1 code)
  2110. (put-u32 die-port 0))
  2111. (8
  2112. (add-die-relocation! 'abs64/1 code)
  2113. (put-u64 die-port 0))))
  2114. ('sec-offset (put-u32 die-port code))
  2115. ('strp (put-u32 die-port code))))
  2116. (define (write-die die)
  2117. (match die
  2118. ((tag ('@ (attrs vals) ...) children ...)
  2119. (let* ((codes (map compute-code attrs vals))
  2120. (forms (map choose-form attrs vals codes))
  2121. (has-children? (not (null? children)))
  2122. (abbrev-code (intern-abbrev tag has-children? attrs forms)))
  2123. (put-uleb128 die-port abbrev-code)
  2124. (for-each write-value codes forms)
  2125. (when has-children?
  2126. (for-each write-die children)
  2127. (put-uleb128 die-port 0))))))
  2128. ;; Compilation unit header.
  2129. (put-u32 die-port 0) ; Length; will patch later.
  2130. (put-u16 die-port 4) ; DWARF 4.
  2131. (put-u32 die-port 0) ; Abbrevs offset.
  2132. (put-u8 die-port (asm-word-size asm)) ; Address size.
  2133. (write-die (make-compile-unit-die asm))
  2134. ;; Terminate the abbrevs list.
  2135. (put-uleb128 abbrev-port 0)
  2136. (write-sources)
  2137. (values (let ((bv (get-die-bv)))
  2138. ;; Patch DWARF32 length.
  2139. (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
  2140. (asm-endianness asm))
  2141. (make-object asm '.debug_info bv die-relocs '()
  2142. #:type SHT_PROGBITS #:flags 0))
  2143. (make-object asm '.debug_abbrev (get-abbrev-bv) '() '()
  2144. #:type SHT_PROGBITS #:flags 0)
  2145. (make-object asm '.debug_str (link-string-table! strtab) '() '()
  2146. #:type SHT_PROGBITS #:flags 0)
  2147. (make-object asm '.debug_loc #vu8() '() '()
  2148. #:type SHT_PROGBITS #:flags 0)
  2149. (let ((bv (get-line-bv)))
  2150. ;; Patch DWARF32 length.
  2151. (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
  2152. (asm-endianness asm))
  2153. (make-object asm '.debug_line bv line-relocs '()
  2154. #:type SHT_PROGBITS #:flags 0)))))
  2155. (define (link-objects asm)
  2156. (let*-values (;; Link procprops before constants, because it probably
  2157. ;; interns more constants.
  2158. ((procprops) (link-procprops asm))
  2159. ((ro rw rw-init) (link-constants asm))
  2160. ;; Link text object after constants, so that the
  2161. ;; constants initializer gets included.
  2162. ((text) (link-text-object asm))
  2163. ((frame-maps) (link-frame-maps asm))
  2164. ((dt) (link-dynamic-section asm text rw rw-init frame-maps))
  2165. ((symtab strtab) (link-symtab (linker-object-section text) asm))
  2166. ((arities arities-strtab) (link-arities asm))
  2167. ((docstrs docstrs-strtab) (link-docstrs asm))
  2168. ((dinfo dabbrev dstrtab dloc dline) (link-debug asm))
  2169. ;; This needs to be linked last, because linking other
  2170. ;; sections adds entries to the string table.
  2171. ((shstrtab) (link-shstrtab asm)))
  2172. (filter identity
  2173. (list text ro frame-maps rw dt symtab strtab
  2174. arities arities-strtab
  2175. docstrs docstrs-strtab procprops
  2176. dinfo dabbrev dstrtab dloc dline
  2177. shstrtab))))
  2178. ;;;
  2179. ;;; High-level public interfaces.
  2180. ;;;
  2181. (define* (link-assembly asm #:key (page-aligned? #t))
  2182. "Produce an ELF image from the code and data emitted into @var{asm}.
  2183. The result is a bytevector, by default linked so that read-only and
  2184. writable data are on separate pages. Pass @code{#:page-aligned? #f} to
  2185. disable this behavior."
  2186. (link-elf (link-objects asm) #:page-aligned? page-aligned?))