dino-chat-export.sh 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. #!/bin/sh
  2. #―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
  3. # Name: dino-chat-exporter
  4. # Desc: Export all conversations from Dino (XMPP client)'s database into
  5. # textual format
  6. # Reqs: shell, sqlite3
  7. # Lisc: GPLv3+
  8. # Auth: jadedctrl <jadedctrl@posteo.at>
  9. # Date: 2022-10
  10. #―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
  11. sqlite() {
  12. sqlite3 "$1" "$2"
  13. if test "$?" -ne 0; then
  14. >&2 printf "sqlite errored out! Let's try again in a moment…"
  15. sleep 1
  16. sqlite3 "$1" "$2"
  17. if test "$?" -ne 0; then
  18. >&2 printf "\t… well that didn't work. Oh, well.\n"
  19. else
  20. >&2 printf "\t… hey, that worked!\n"
  21. fi
  22. fi
  23. }
  24. # A list of all accounts, by internal ID
  25. account_list() {
  26. sqlite "$DB_FILE" \
  27. "SELECT id
  28. FROM account;"
  29. }
  30. # A list of all counterpart/contact IDs for messages
  31. conversation_partners() {
  32. local account_id="$1"
  33. sqlite "$DB_FILE" \
  34. "SELECT DISTINCT counterpart_id
  35. FROM message
  36. WHERE account_id == $account_id;"
  37. }
  38. # Outputs valid file extension for given file
  39. file_extension() {
  40. local file="$1"
  41. # For some reason, `file` doesn't choose a file extension for HTML nor plaintext files?
  42. if file --brief --mime "$file" | grep "text/html" > /dev/null; then
  43. echo "html"
  44. else
  45. file --brief --extension "$file" \
  46. | cut --delimiter='/' --fields=1 \
  47. | sed 's%^???$%txt%'
  48. fi
  49. }
  50. # Output the account no.'s jid_id (aka, accounts.id→jid.id)
  51. # (We cache this in a global variable, so we're not making a million database queries)
  52. account_jid_id() {
  53. local account_id="$1"
  54. if test -z "$YOUR_JID_ID"; then
  55. YOUR_JID_ID="$(sqlite "$DB_FILE" \
  56. "SELECT jid.id
  57. FROM account, jid
  58. WHERE account.id == $account_id
  59. AND account.bare_jid == jid.bare_jid;")"
  60. fi
  61. echo "$YOUR_JID_ID"
  62. }
  63. # Output the account no.'s xmpp address and nick
  64. # (We cache this in a global variable, so we're not making a million database queries)
  65. account_jid_and_nick() {
  66. local account_id="$1"
  67. if test -z "$YOUR_INFO"; then
  68. YOUR_INFO="$(sqlite "$DB_FILE" \
  69. "SELECT FORMAT('%s' || char(10) || '%s',
  70. bare_jid,
  71. alias)
  72. FROM account
  73. WHERE id == $account_id;")"
  74. fi
  75. echo "$YOUR_INFO"
  76. }
  77. # Get a user's (based on jid.id) xmpp address and roster nickname
  78. # (We cache this in a global variable, so we're not making a million database queries)
  79. id_jid_and_nick() {
  80. local internal_id="$1"
  81. if test -z "$THEIR_INFO"; then
  82. local nick="$(sqlite "$DB_FILE" \
  83. "SELECT
  84. CASE
  85. WHEN roster.name IS NOT NULL
  86. THEN roster.name
  87. END
  88. FROM roster, jid
  89. WHERE roster.jid == jid.bare_jid AND jid.id == $internal_id;")"
  90. local jid="$(sqlite "$DB_FILE" \
  91. "SELECT bare_jid
  92. FROM jid
  93. WHERE jid.id == $internal_id;")"
  94. if test -z "$nick"; then
  95. THEIR_INFO="$(printf '%s\n%s\n' "$jid" "$jid")"
  96. else
  97. THEIR_INFO="$(printf '%s\n%s\n' "$jid" "$nick")"
  98. fi
  99. fi
  100. echo "$THEIR_INFO"
  101. }
  102. # Archives a full conversation with user (messages and files)
  103. archive_conversation_with_partner() {
  104. local account_id="$1"
  105. local partner_id="$2"
  106. local output_dir="$3"
  107. mkdir -p "$output_dir"
  108. if test ! -d "$output_dir"; then
  109. echo "$output_dir isn't a valid directory"
  110. exit 2
  111. fi
  112. archive_files_with_partner "$account_id" "$partner_id" "$output_dir/files"
  113. archive_messages_with_partner "$account_id" "$partner_id" "$output_dir/messages"
  114. }
  115. # Archives all messages between you and partner, according to a stem
  116. archive_messages_with_partner() {
  117. local account_id="$1"
  118. local partner_id="$2"
  119. local output_stem="$3"
  120. output_messages_with_partner "$account_id" "$partner_id" \
  121. > "$output_stem"
  122. mv "$output_stem" "$output_stem.$(file_extension "$output_stem")"
  123. }
  124. # Archives all (currently known/downloaded) files and avatars between you and partner
  125. archive_files_with_partner() {
  126. local account_id="$1"
  127. local partner_id="$2"
  128. local output_dir="$3"
  129. local IFS="
  130. "
  131. mkdir -p "$output_dir"
  132. if test ! -d "$output_dir"; then
  133. echo "$output_dir isn't a valid directory"
  134. return
  135. fi
  136. THEIR_AVATAR="$(archive_avatars "$account_id" "$partner_id" "$output_dir/avatar" | head -1)"
  137. YOUR_AVATAR="$(archive_avatars "$account_id" "$(account_jid_id "$account_id")" "$output_dir/your_avatar" | head -1)"
  138. if test -z "$THEIR_AVATAR"; then
  139. THEIR_AVATAR="files/their_avatar.png"
  140. fi
  141. if test -z "$YOUR_AVATAR"; then
  142. YOUR_AVATAR="files/your_avatar.png"
  143. fi
  144. local files="$(sqlite "$DB_FILE" \
  145. "SELECT path
  146. FROM file_transfer
  147. WHERE counterpart_id == $partner_id AND account_id == $account_id;")"
  148. for file in $files; do
  149. cp "$DINO_HOME/files/$file" "$output_dir/$file"
  150. done
  151. }
  152. # Archive the avatars of a user, according to a stem
  153. # ("./files/avatar" becomes "./files/avatar.png", "./files/avatar1.png"…)
  154. archive_avatars() {
  155. local account_id="$1"
  156. local internal_id="$2"
  157. local output_stem="$3"
  158. local i=""
  159. for file in $(avatar_paths "$account_id" "$internal_id"); do
  160. local output_path="$output_stem${i}.$(file_extension "$file")"
  161. echo "$output_path"
  162. cp "$file" "$output_stem${i}.$(file_extension "$file")"
  163. done
  164. }
  165. # For flexibility in formatting, we let the user define the selection order in a simplified manner
  166. message_slots_to_selection() {
  167. local slots="$1"
  168. local jid_query_part="CASE message.direction
  169. WHEN 0
  170. THEN jid.bare_jid
  171. ELSE ( select account.bare_jid from account where account.id == message.account_id )
  172. END"
  173. local avatar_query_part="CASE message.direction
  174. WHEN 0
  175. THEN 'files/$(basename "$THEIR_AVATAR")'
  176. ELSE 'files/$(basename "$YOUR_AVATAR")'
  177. END"
  178. # If this message has a file attached, print the file's relative path
  179. # Uses two seperate output formats for files and for images
  180. local body_query_part="
  181. CASE
  182. WHEN message.id == (
  183. SELECT file_transfer.info
  184. FROM file_transfer
  185. WHERE file_transfer.info == message.id )
  186. THEN ( SELECT
  187. CASE
  188. WHEN (file_transfer.path LIKE '%.jpg') OR (file_transfer.path LIKE '%.jpeg') OR (file_transfer.path LIKE '%.jpeg')
  189. OR (file_transfer.path LIKE '%.png') OR (file_transfer.path LIKE '%.webm') OR (file_transfer.path LIKE '%.svg')
  190. THEN PRINTF('$IMAGE_FORMAT', 'files/' || path)
  191. ELSE PRINTF('$FILE_FORMAT', 'files/' || path, 'files/' || path)
  192. END
  193. FROM file_transfer
  194. WHERE file_transfer.info == message.id )
  195. ELSE message.body
  196. END"
  197. echo "$slots" \
  198. | sed "s^DATE^DATETIME(message.local_time, 'unixepoch', 'localtime')^g" \
  199. | sed "s^JID^$(echo "$jid_query_part" | tr '\n' ' ' | tr -d '\t')^g" \
  200. | sed "s^AVATAR^$(echo "$avatar_query_part" | tr '\n' ' ' | tr -d '\t')^g" \
  201. | sed "s^BODY^$(echo "$body_query_part" | tr '\n' ' ' | tr -d '\t')^g"
  202. }
  203. # Prints a header/footer for message output, replacing useful variables
  204. output_message_cap() {
  205. local account_id="$1"
  206. local partner_id="$2"
  207. local message_cap="$3"
  208. echo "$message_cap" \
  209. | sed 's%YOUR_JID%'"$(account_jid_and_nick "$account_id" | head -1)"'%g' \
  210. | sed 's%YOUR_NICK%'"$(account_jid_and_nick "$account_id" | tail -1)"'%g' \
  211. | sed 's%THEIR_JID%'"$(id_jid_and_nick "$partner_id" | head -1)"'%g' \
  212. | sed 's%THEIR_NICK%'"$(id_jid_and_nick "$partner_id" | tail -1)"'%g'
  213. }
  214. # Outputs all conversation's text with partner, as per $MESSAGE_FORMAT
  215. output_messages_with_partner() {
  216. local account_id="$1"
  217. local partner_id="$2"
  218. local output_dir="$3" # optional, only used to guess avatar paths
  219. output_message_cap "$account_id" "$partner_id" "$MESSAGE_HEADER"
  220. sqlite "$DB_FILE" \
  221. "SELECT FORMAT('$MESSAGE_FORMAT',
  222. $(message_slots_to_selection "$MESSAGE_SLOTS"))
  223. FROM jid,message
  224. WHERE message.account_id == '$account_id'
  225. AND message.counterpart_id == $partner_id
  226. AND jid.id == $partner_id
  227. ORDER BY message.local_time ASC;"
  228. output_message_cap "$account_id" "$partner_id" "$MESSAGE_FOOTER"
  229. }
  230. # Outputs existant avatar paths for the given user, by internal ID
  231. avatar_paths() {
  232. local account_id="$1"
  233. local internal_id="$2"
  234. local IFS="
  235. "
  236. for file in $(potential_avatar_paths "$account_id" "$internal_id" | uniq); do
  237. if test -e "$file"; then
  238. echo "$file"
  239. fi
  240. done
  241. }
  242. # Outputs potential paths for a user's avatar, by internal ID
  243. potential_avatar_paths() {
  244. local account_id="$1"
  245. local internal_id="$2"
  246. sqlite "$DB_FILE" \
  247. "SELECT '$DINO_HOME/avatars/' || hash
  248. FROM contact_avatar
  249. WHERE jid_id == '$internal_id'
  250. AND account_id == '$account_id';"
  251. }
  252. # USER ENVIRONMENT
  253. # ———————————————————————————————————————————————————————————————————————————————
  254. # Where Dino's data lives
  255. if test -z "$DINO_HOME"; then
  256. DINO_HOME="$XDG_DATA_HOME/dino/"
  257. fi
  258. if test ! -e "$DINO_HOME"; then
  259. DINO_HOME="$HOME/.local/share/dino/"
  260. fi
  261. DB_FILE="$XDG_DATA_HOME/dino/dino.db"
  262. # The format for message output, with %s being substitued with it's corresponding
  263. # place in $MESSAGE_SLOTS
  264. if test -z "$MESSAGE_FORMAT"; then
  265. MESSAGE_FORMAT="%s <%s> %s"
  266. fi
  267. # The slots used in $MESSAGE_FORMAT.
  268. # May be DATE, JID, BODY, or AVATAR. Must be comma-delimited.
  269. if test -z "$MESSAGE_SLOTS"; then
  270. MESSAGE_SLOTS="DATE, JID, BODY"
  271. fi
  272. if test -z "$FILE_FORMAT"; then
  273. FILE_FORMAT="File uploaded: %s"
  274. fi
  275. if test -z "$IMAGE_FORMAT"; then
  276. IMAGE_FORMAT="Image uploaded: %s"
  277. fi
  278. # STATE
  279. # ———————————————————————————————————————————————————————————————————————————————
  280. # How repulsive… very sorry about this =w="
  281. THEIR_INFO=""
  282. THEIR_AVATAR=""
  283. YOUR_INFO=""
  284. YOUR_JID_ID=""
  285. YOUR_AVATAR=""
  286. # INVOCATION
  287. # ———————————————————————————————————————————————————————————————————————————————
  288. usage() {
  289. echo "usage: $(basename "$0") OUTPUT_DIRECTORY"
  290. echo
  291. echo "Exports all conversations and files from the Dino XMPP client into a plain-text format."
  292. echo
  293. echo ' $DINO_HOME'
  294. echo ' Dino data directory (default: $XDG_DATA_HOME/Dino or ~/.local/share/Dino)'
  295. echo ' $MESSAGE_HEADER'
  296. echo ' Text preceding each message file, with basic substitutions. (e.g., "<html><body>…")'
  297. echo ' Substitutions are THEIR_JID, YOUR_JID, THEIR_NICK, and YOUR_NICK.'
  298. echo ' $MESSAGE_FOOTER'
  299. echo ' Likewise, but is output to the end of each message file. (e.g., "</body></html>")'
  300. echo ' $MESSAGE_FORMAT'
  301. echo ' Template for message output, in a printf style (e.g., "[%s] <%s>: %s")'
  302. echo ' $MESSAGE_SLOTS'
  303. echo ' Comma-delimited arguments for $MESSAGE_FORMAT (e.g., "DATE,JID,BODY")'
  304. echo ' Valid slots are AVATAR, BODY, DATE, and JID.'
  305. echo ' $IMAGE_FORMAT'
  306. echo ' Format for message-bodies containing an image. (e.g., "<img src="%s" />)'
  307. echo ' Leave blank or as '%s' to simply print the image path.'
  308. echo ' $FILE_FORMAT'
  309. echo ' Likewise, but for every other sort of attached file.'
  310. exit 2
  311. }
  312. OUTPUT="$1"
  313. if test -z "$OUTPUT" -o "$1" = "--help" -o "$1" = "-h"; then
  314. usage
  315. fi
  316. for account in $(account_list); do
  317. # Reset state (repopulated by account_jid_and_nick; account_jid_id; archive_files…)
  318. YOUR_INFO=""; YOUR_JID_ID=""; YOUR_AVATAR=""
  319. jid="$(account_jid_and_nick "$account" | head -1)"
  320. nick="$(account_jid_and_nick "$account" | tail -1)"
  321. account_output="$OUTPUT/$jid/"
  322. if test -n "$nick" -a ! "$nick" = "$jid"; then
  323. account_output="$OUTPUT/$nick ($jid)/"
  324. fi
  325. for partner in $(conversation_partners "$account"); do
  326. # Reset state (repopulated by id_jid_and_nick; archive_files_with…)
  327. THEIR_INFO=""; THEIR_AVATAR=""
  328. jid="$(id_jid_and_nick "$partner" | head -1)"
  329. nick="$(id_jid_and_nick "$partner" | tail -1)"
  330. partner_output="$account_output/$jid/"
  331. if test -n "$nick" -a ! "$nick" = "$jid"; then
  332. partner_output="$account_output/$nick ($jid)/"
  333. fi
  334. echo "Archiving $jid…"
  335. archive_conversation_with_partner "$account" "$partner" "$partner_output"
  336. done
  337. done