rng-uri.el 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. ;;; rng-uri.el --- URI parsing and manipulation
  2. ;; Copyright (C) 2003, 2007-2012 Free Software Foundation, Inc.
  3. ;; Author: James Clark
  4. ;; Keywords: XML
  5. ;; This file is part of GNU Emacs.
  6. ;; GNU Emacs is free software: you can redistribute it and/or modify
  7. ;; it under the terms of the GNU General Public License as published by
  8. ;; the Free Software Foundation, either version 3 of the License, or
  9. ;; (at your option) any later version.
  10. ;; GNU Emacs is distributed in the hope that it will be useful,
  11. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. ;; GNU General Public License for more details.
  14. ;; You should have received a copy of the GNU General Public License
  15. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  16. ;;; Commentary:
  17. ;;; Code:
  18. (defun rng-file-name-uri (f)
  19. "Return a URI for the filename F.
  20. Multibyte characters are left as is. Use `rng-uri-escape-multibyte' to
  21. escape them using %HH."
  22. (setq f (expand-file-name f))
  23. (let ((url
  24. (replace-regexp-in-string "[\000-\032\177<>#%\"{}|\\^[]`%?;]"
  25. 'rng-percent-encode
  26. f)))
  27. (concat "file:"
  28. (if (and (> (length url) 0)
  29. (= (aref url 0) ?/))
  30. "//"
  31. "///")
  32. url)))
  33. (defun rng-uri-escape-multibyte (uri)
  34. "Escape multibyte characters in URI."
  35. (replace-regexp-in-string "[:nonascii:]"
  36. 'rng-percent-encode
  37. (encode-coding-string uri 'utf-8)))
  38. (defun rng-percent-encode (str)
  39. (apply 'concat
  40. (mapcar (lambda (ch)
  41. (format "%%%x%x" (/ ch 16) (% ch 16)))
  42. (string-to-list str))))
  43. (defun rng-uri-file-name (uri)
  44. "Return the filename represented by a URI.
  45. Signal an error if URI is not a valid file URL."
  46. (rng-uri-file-name-1 uri nil))
  47. (defun rng-uri-pattern-file-name-regexp (pattern)
  48. "Return a regexp for filenames represented by URIs that match PATTERN."
  49. (rng-uri-file-name-1 pattern 'match))
  50. (defun rng-uri-pattern-file-name-replace-match (pattern)
  51. (rng-uri-file-name-1 pattern 'replace))
  52. ;; pattern is either nil or match or replace
  53. (defun rng-uri-file-name-1 (uri pattern)
  54. (unless (string-match "\\`\\(?:[^%]\\|%[0-9a-fA-F]{2}\\)*\\'" uri)
  55. (rng-uri-error "Bad escapes in URI `%s'" uri))
  56. (setq uri (rng-uri-unescape-multibyte uri))
  57. (let* ((components
  58. (or (rng-uri-split uri)
  59. (rng-uri-error "Cannot split URI `%s' into its components" uri)))
  60. (scheme (nth 0 components))
  61. (authority (nth 1 components))
  62. (path (nth 2 components))
  63. (absolutep (string-match "\\`/" path))
  64. (query (nth 3 components))
  65. (fragment-id (nth 4 components)))
  66. (cond ((not scheme)
  67. (unless pattern
  68. (rng-uri-error "URI `%s' does not have a scheme" uri)))
  69. ((not (string= (downcase scheme) "file"))
  70. (rng-uri-error "URI `%s' does not use the `file:' scheme" uri)))
  71. (when (not (member authority
  72. (cons system-name '(nil "" "localhost"))))
  73. (rng-uri-error "URI `%s' does not start with `file:///' or `file://localhost/'"
  74. uri))
  75. (when query
  76. (rng-uri-error "`?' not escaped in file URI `%s'" uri))
  77. (when fragment-id
  78. (rng-uri-error "URI `%s' has a fragment identifier" uri))
  79. (when (string-match ";" path)
  80. (rng-uri-error "`;' not escaped in URI `%s'" uri))
  81. (when (string-match "%2[fF]" path) ;; 2f is hex code of slash
  82. (rng-uri-error "Escaped slash in URI `%s'" uri))
  83. (when (and (eq system-type 'windows-nt)
  84. absolutep
  85. (file-name-absolute-p (substring path 1)))
  86. (setq path (substring path 1)))
  87. (when (and pattern (string-match "\\`\\./" path))
  88. (setq path (substring path 2)))
  89. (setq path
  90. (cond ((eq pattern 'match)
  91. (rng-uri-unescape-unibyte-match path))
  92. ((eq pattern 'replace)
  93. (rng-uri-unescape-unibyte-replace path 2))
  94. (t
  95. (rng-uri-unescape-unibyte path))))
  96. (when (string-match "\000" path)
  97. (rng-uri-error "URI `%s' has NUL character in path" uri))
  98. (when (eq pattern 'match)
  99. (setq path
  100. (concat (if absolutep
  101. "\\(\\)"
  102. "\\(\\(?:[^/]*/\\)*\\)")
  103. path)))
  104. (cond ((eq pattern 'match)
  105. (concat "\\`" path "\\'"))
  106. ((and (eq pattern 'replace)
  107. (not absolutep))
  108. (concat "\\1" path))
  109. (t path))))
  110. (defun rng-uri-error (&rest args)
  111. (signal 'rng-uri-error (list (apply 'format args))))
  112. (put 'rng-uri-error 'error-conditions '(error rng-uri-error))
  113. (put 'rng-uri-error 'error-message "Invalid URI")
  114. (defun rng-uri-split (str)
  115. (and (string-match "\\`\\(?:\\([^:/?#]+\\):\\)?\
  116. \\(?://\\([^/?#]*\\)\\)?\
  117. \\([^?#]*\\)\
  118. \\(?:\\?\\([^#]*\\)\\)?\
  119. \\(?:#\\(\\(?:.\\|\n\\)*\\)\\)?\\'"
  120. str)
  121. (list (match-string 1 str)
  122. (match-string 2 str)
  123. (match-string 3 str)
  124. (match-string 4 str)
  125. (match-string 5 str))))
  126. (defun rng-uri-join (scheme authority path &optional query fragment-id)
  127. (when path
  128. (let (parts)
  129. (when fragment-id
  130. (setq parts (list "#" fragment-id)))
  131. (when query
  132. (setq parts
  133. (cons "?"
  134. (cons query parts))))
  135. (setq parts (cons path parts))
  136. (when authority
  137. (setq parts
  138. (cons "//"
  139. (cons authority parts))))
  140. (when scheme
  141. (setq parts
  142. (cons scheme
  143. (cons ":" parts))))
  144. (apply 'concat parts))))
  145. (defun rng-uri-resolve (uri-ref base-uri)
  146. "Resolve a possibly relative URI reference into absolute form.
  147. URI-REF is the URI reference to be resolved.
  148. BASE-URI is the base URI to use for resolving it.
  149. The algorithm is specified by RFC 2396.
  150. If there is some problem with URI-REF or BASE-URI, then
  151. URI-REF will be returned."
  152. (let* ((components (rng-uri-split uri-ref))
  153. (scheme (nth 0 components))
  154. (authority (nth 1 components))
  155. (path (nth 2 components))
  156. (query (nth 3 components))
  157. (fragment-id (nth 4 components))
  158. (base-components (rng-uri-split base-uri)))
  159. (if (or (not components)
  160. scheme
  161. (not base-components)
  162. (not (nth 0 base-components)))
  163. uri-ref
  164. (setq scheme (nth 0 base-components))
  165. (when (not authority)
  166. (setq authority (nth 1 base-components))
  167. (if (and (equal path "") (not query))
  168. ;; Handle same document reference by returning
  169. ;; same URI (RFC 2396bis does this too).
  170. (setq path (nth 2 base-components)
  171. query (nth 3 base-components))
  172. (setq path (rng-resolve-path path (nth 2 base-components)))))
  173. (rng-uri-join scheme
  174. authority
  175. path
  176. query
  177. fragment-id))))
  178. ;; See RFC 2396 5.2, steps 5 and 6
  179. (defun rng-resolve-path (path base-path)
  180. ;; Step 5
  181. (if (or (string-match "\\`/" path)
  182. (not (string-match "\\`/" base-path)))
  183. path
  184. ;; Step 6
  185. ;; (a), (b)
  186. (let ((segments (rng-split-path path))
  187. (base-segments (rng-split-path base-path)))
  188. (if (> (length base-segments) 1)
  189. (setq segments (nconc (nbutlast base-segments)
  190. segments))
  191. (setcar segments
  192. (concat (car base-segments) (car segments))))
  193. ;; (d)
  194. (let ((last-segment (last segments)))
  195. (when (equal (car last-segment) ".")
  196. (setcar last-segment "")))
  197. ;; (c)
  198. (setq segments (delete "." segments))
  199. ;; (e)
  200. (let (iter matched)
  201. (while (progn
  202. (setq matched nil)
  203. (setq iter (cdr segments))
  204. (while (and iter (not matched))
  205. (if (or (not (equal (cadr iter) ".."))
  206. (equal (car iter) ".."))
  207. (setq iter (cdr iter))
  208. (setcar iter nil)
  209. (setcar (cdr iter)
  210. ;; (f)
  211. (if (cddr iter) nil ""))
  212. (setq matched t)
  213. (setq segments (delq nil segments))))
  214. matched)))
  215. (rng-join-path segments))))
  216. (defun rng-relative-uri (full base)
  217. "Return a URI that relative to BASE is equivalent to FULL.
  218. The returned URI will be relative if possible.
  219. Both FULL and BASE must be absolute URIs."
  220. (let* ((components (rng-uri-split full))
  221. (scheme (nth 0 components))
  222. (authority (nth 1 components))
  223. (path (nth 2 components))
  224. (query (nth 3 components))
  225. (fragment-id (nth 4 components))
  226. (base-components (rng-uri-split base)))
  227. (if (and components
  228. base-components
  229. scheme
  230. (equal scheme
  231. (nth 0 base-components)))
  232. (progn
  233. (setq scheme nil)
  234. (when (and authority
  235. (equal authority
  236. (nth 1 base-components)))
  237. (setq authority nil)
  238. (setq path (rng-relative-path path (nth 2 base-components))))
  239. (rng-uri-join scheme authority path query fragment-id))
  240. full)))
  241. (defun rng-relative-path (path base-path)
  242. (let ((segments (rng-split-path path))
  243. (base-segments (rng-split-path base-path)))
  244. (when (> (length base-segments) 1)
  245. (setq base-segments (nbutlast base-segments)))
  246. (if (or (member "." segments)
  247. (member ".." segments)
  248. (member "." base-segments)
  249. (member ".." base-segments))
  250. path
  251. (while (and segments
  252. base-segments
  253. (string= (car segments)
  254. (car base-segments)))
  255. (setq segments (cdr segments))
  256. (setq base-segments (cdr base-segments)))
  257. (while base-segments
  258. (setq base-segments (cdr base-segments))
  259. (setq segments (cons ".." segments)))
  260. (when (equal (car segments) "")
  261. (setq segments (cons "." segments)))
  262. (rng-join-path segments))))
  263. (defun rng-split-path (path)
  264. (let ((start 0)
  265. segments)
  266. (while (string-match "/" path start)
  267. (setq segments (cons (substring path start (match-beginning 0))
  268. segments))
  269. (setq start (match-end 0)))
  270. (nreverse (cons (substring path start) segments))))
  271. (defun rng-join-path (segments)
  272. (and segments
  273. (mapconcat 'identity segments "/")))
  274. (defun rng-uri-unescape-multibyte (str)
  275. (replace-regexp-in-string "\\(?:%[89a-fA-F][0-9a-fA-F]\\)+"
  276. 'rng-multibyte-percent-decode
  277. str))
  278. (defun rng-multibyte-percent-decode (str)
  279. (decode-coding-string (apply 'string
  280. (mapcar (lambda (h) (string-to-number h 16))
  281. (split-string str "%")))
  282. 'utf-8))
  283. (defun rng-uri-unescape-unibyte (str)
  284. (replace-regexp-in-string "%[0-7][0-9a-fA-F]"
  285. (lambda (h)
  286. (string-to-number (substring h 1) 16))
  287. str
  288. t
  289. t))
  290. (defun rng-uri-unescape-unibyte-match (str)
  291. (replace-regexp-in-string "%[0-7][0-9a-fA-F]\\|[^%]"
  292. (lambda (match)
  293. (if (string= match "*")
  294. "\\([^/]*\\)"
  295. (regexp-quote
  296. (if (= (length match) 1)
  297. match
  298. (string-to-number (substring match 1)
  299. 16)))))
  300. str
  301. t
  302. t))
  303. (defun rng-uri-unescape-unibyte-replace (str next-match-index)
  304. (replace-regexp-in-string
  305. "%[0-7][0-9a-fA-F]\\|[^%]"
  306. (lambda (match)
  307. (if (string= match "*")
  308. (let ((n next-match-index))
  309. (setq next-match-index (1+ n))
  310. (format "\\%s" n))
  311. (let ((ch (if (= (length match) 1)
  312. (aref match 0)
  313. (string-to-number (substring match 1)
  314. 16))))
  315. (if (eq ch ?\\)
  316. (string ?\\ ?\\)
  317. (string ch)))))
  318. str
  319. t
  320. t))
  321. (provide 'rng-uri)
  322. ;;; rng-uri.el ends here