os-string.scm 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. ; Copyright (c) 1993-2008 by Richard Kelsey and Jonathan Rees. See file COPYING.
  2. ; You may think that file names / environment variables / user names
  3. ; etc. are just text, but on most platforms, that assumption is wrong:
  4. ; They are usually NUL-terminated byte strings in some format. The
  5. ; bytes are invariant, but the corresponding text may depend on the
  6. ; locale. Also, byte sequences without a textual representation are
  7. ; possible.
  8. ; We assume that OS strings are encoded in some conservative extension
  9. ; of NUL-terminated ASCII. On Unix, this assumption pretty much has
  10. ; to hold true because of the various constraints of locale handling
  11. ; there. The Windows API uses an extension of UTF-16 that includes
  12. ; unpaired surrogates. For this, we use a synthetic extension of
  13. ; UTF-8 called UTF-8of16 that also deals with unpaired surrogates.
  14. ; #### lossiness
  15. (define-record-type os-string :os-string
  16. (really-make-os-string text-codec string byte-vector)
  17. os-string?
  18. (text-codec os-string-text-codec)
  19. ; may be #f, will get cached value
  20. (string os-string-string set-os-string-string!)
  21. ; may be #f, will get cached value
  22. (byte-vector os-string-byte-vector set-os-string-byte-vector!))
  23. (define-record-discloser :os-string
  24. (lambda (oss)
  25. (list "OS-string"
  26. (text-codec-names (os-string-text-codec oss))
  27. (os-string->string oss))))
  28. (define *initial-os-string-text-codec* #f)
  29. (define (initialize-os-string-text-codec!)
  30. (set! *initial-os-string-text-codec*
  31. (or (find-text-codec
  32. (system-parameter (enum system-parameter-option os-string-encoding)))
  33. us-ascii-codec)))
  34. (define $os-string-text-codec
  35. (make-fluid
  36. (lambda () *initial-os-string-text-codec*)))
  37. (define (current-os-string-text-codec)
  38. ((fluid $os-string-text-codec)))
  39. (define (call-with-os-string-text-codec codec thunk)
  40. (let-fluid $os-string-text-codec (lambda () codec)
  41. thunk))
  42. (define (make-os-string codec thing)
  43. (call-with-values
  44. (lambda ()
  45. (cond
  46. ((string? thing)
  47. (values (make-immutable! thing) #f))
  48. ((byte-vector? thing)
  49. (values #f (make-immutable! (byte-vector-copy-z thing))))
  50. (else
  51. (assertion-violation 'make-os-string "invalid argument" thing))))
  52. (lambda (str bv)
  53. (really-make-os-string codec str bv))))
  54. (define (string->os-string s)
  55. (let ((c (string-copy s)))
  56. (make-immutable! c)
  57. (really-make-os-string (current-os-string-text-codec)
  58. c #f)))
  59. (define (byte-vector->os-string b)
  60. (let ((c (byte-vector-copy-z b)))
  61. (make-immutable! b)
  62. (really-make-os-string (current-os-string-text-codec)
  63. #f c)))
  64. (define (os-string->byte-vector oss)
  65. (or (os-string-byte-vector oss)
  66. (let* ((string (os-string-string oss))
  67. (codec (os-string-text-codec oss))
  68. (size (string-encoding-length codec
  69. string
  70. 0
  71. (string-length string)))
  72. (bytes (make-byte-vector (+ size 1) 0))) ; NUL termination
  73. (encode-string codec
  74. string 0 (string-length string)
  75. bytes 0 size)
  76. (set-os-string-byte-vector! oss bytes)
  77. (make-immutable! bytes)
  78. bytes)))
  79. (define (os-string->string oss)
  80. (or (os-string-string oss)
  81. (let* ((bytes (os-string-byte-vector oss))
  82. (size (- (byte-vector-length bytes) 1))
  83. (codec (os-string-text-codec oss)))
  84. (call-with-values
  85. (lambda ()
  86. (bytes-string-size codec bytes 0 size #f))
  87. (lambda (status consumed-count decoded-count)
  88. (let ((string (make-string decoded-count)))
  89. (decode-string codec bytes 0 size
  90. string 0 decoded-count
  91. #\?)
  92. (set-os-string-string! oss string)
  93. (make-immutable! string)
  94. string))))))
  95. (define (x->os-string x)
  96. (cond
  97. ((os-string? x) x)
  98. ((string? x) (string->os-string x))
  99. ((byte-vector? x) (byte-vector->os-string x))))
  100. (define (os-string=? os1 os2)
  101. (byte-vector=? (os-string->byte-vector os1) (os-string->byte-vector os2)))
  102. ; frequent idioms
  103. (define (string->os-byte-vector s)
  104. (os-string->byte-vector (string->os-string s)))
  105. (define (x->os-byte-vector x)
  106. (os-string->byte-vector (x->os-string x)))
  107. ; Utilities
  108. (define (byte-vector-copy-z b)
  109. (let* ((size-old (byte-vector-length b))
  110. (nul? (and (positive? size-old)
  111. (zero? (byte-vector-ref b (- size-old 1)))))
  112. (size (if nul? size-old (+ 1 size-old)))
  113. (result (make-byte-vector size 0)))
  114. (copy-bytes! b 0 result 0 size-old)
  115. result))
  116. ; Initialization
  117. (initialize-os-string-text-codec!)