fedi-archive.sh 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. #!/bin/sh
  2. #―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
  3. # Name: fedi-archive.sh
  4. # Desc: Downloads (most) posts from a fedi account in parseable format.
  5. # Reqs: jq, curl
  6. # Date: 2023-05-06
  7. # Auth: @jadedctrl@jam.xwx.moe
  8. #―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
  9. # Given a JSON file containing /api/v1/accounts/$user/statuses output,
  10. # output a post at the given index from the file like so:
  11. # FAVOURITE_COUNT DATE_POSTED POST_ID POST_URL
  12. # [spoiler: SPOILER_TEXT]
  13. # [media: URL DESCRIPTION]
  14. # [media: URL DESCRIPTION]
  15. # [CONTENT…]
  16. # [] meaning "optional". There might be an arbitrary amount of Media: lines.
  17. output_post_of_index() {
  18. local index="$1"
  19. local file="$2"
  20. if test "$(jq -r --arg INDEX "$index" '.[$INDEX|tonumber]' < "$file")" = "null"; then
  21. return 1
  22. else
  23. jq -r --arg INDEX "$index" \
  24. '.[$INDEX|tonumber] | "\(.favourites_count) \(.created_at) \(.id) \(.url)
  25. \("spoiler: " + .spoiler_text)
  26. \(if (.media_attachments | length) > 0 then .media_attachments[] | "media: " + .url + " " + .description else "" end)
  27. \(.content)"' \
  28. < "$file"
  29. fi
  30. }
  31. # Fetch a list of a user's statuses, given their server and username.
  32. # `max_id` can be passed to return only messages older than said message.
  33. fetch_page() {
  34. local server="$1"; local user="$2"; local max_id="$3"
  35. local url="https://$server/api/v1/accounts/$user/statuses?exclude_replies=true&exclude_reblogs=true&limit=40"
  36. if test -n "$max_id"; then
  37. url="${url}&max_id=${max_id}"
  38. fi
  39. curl "$url"
  40. }
  41. # Given a JSON file containing /api/v1/accounts/$user/statuses output,
  42. # output each status into an individual file of the format of
  43. # output_post_of_index(); see its comment for more information.
  44. # Prints the ID of the last post of the file.
  45. archive_posts() {
  46. local json_file="$1"
  47. local prefix="$2"
  48. local post_file="$prefix-$i"
  49. local last_post_file=""
  50. local i="0"
  51. local output_ret=0
  52. while test "$output_ret" -eq 0; do
  53. post_file="$prefix-$i"
  54. echo "$post_file" 1>&2
  55. output_post_of_index "$i" "$json_file" \
  56. > "$post_file"
  57. output_ret="$?"
  58. if test -e "$post_file" -a -n "$(cat "$post_file")"; then
  59. last_post_file="$post_file"
  60. elif test -e "$post_file"; then
  61. rm "$post_file"
  62. fi
  63. i="$(echo "$i + 1" | bc)"
  64. done
  65. head -1 "$last_post_file" \
  66. | awk '{print $3}'
  67. }
  68. # Fetch all posts for the given user at given server.
  69. archive_all_posts() {
  70. local server="$1"
  71. local username="$2"
  72. local temp="$(mktemp)"
  73. fetch_page "$server" "$username" \
  74. > "$temp"
  75. local page="1"
  76. local next_id="$(archive_posts "$temp" "$page")"
  77. while test -n "$next_id"; do
  78. page="$(echo "$page + 1" | bc)"
  79. echo "$next_id - $page…"
  80. fetch_page "$server" "$username" "$next_id" \
  81. > "$temp"
  82. next_id="$(archive_posts "$temp" "$page")"
  83. done
  84. rm "$temp"
  85. }
  86. usage() {
  87. echo "usage: $(basename $0) username server" 1>&2
  88. echo "" 1>&2
  89. echo "$(basename $0) is a script that fetches all of a user's Mastodon/Pleroma" 1>&2
  90. echo "posts for archival purposes." 1>&2
  91. echo "Mainly for use with fedi-post.sh or pleroma-migrate.sh." 1>&2
  92. exit 2;
  93. }
  94. USERNAME="$1"
  95. SERVER="$2"
  96. if test -z "$USERNAME" -o -z "$SERVER" -o "$1" = "-h" -o "$1" = "--help"; then
  97. usage
  98. fi
  99. archive_all_posts "$SERVER" "$USERNAME"