ansi2html.sh 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. #!/bin/sh
  2. # Convert ANSI (terminal) colours and attributes to HTML
  3. # Licence: LGPLv2
  4. # Author:
  5. # http://www.pixelbeat.org/docs/terminal_colours/
  6. # Examples:
  7. # ls -l --color=always | ansi2html.sh > ls.html
  8. # git show --color | ansi2html.sh > last_change.html
  9. # Generally one can use the `script` util to capture full terminal output.
  10. # Changes:
  11. # V0.1, 24 Apr 2008, Initial release
  12. # V0.2, 01 Jan 2009, Phil Harnish <philharnish@gmail.com>
  13. # Support `git diff --color` output by
  14. # matching ANSI codes that specify only
  15. # bold or background colour.
  16. # P@draigBrady.com
  17. # Support `ls --color` output by stripping
  18. # redundant leading 0s from ANSI codes.
  19. # Support `grep --color=always` by stripping
  20. # unhandled ANSI codes (specifically ^[[K).
  21. # V0.3, 20 Mar 2009, http://eexpress.blog.ubuntu.org.cn/
  22. # Remove cat -v usage which mangled non ascii input.
  23. # Cleanup regular expressions used.
  24. # Support other attributes like reverse, ...
  25. # P@draigBrady.com
  26. # Correctly nest <span> tags (even across lines).
  27. # Add a command line option to use a dark background.
  28. # Strip more terminal control codes.
  29. # V0.4, 17 Sep 2009, P@draigBrady.com
  30. # Handle codes with combined attributes and color.
  31. # Handle isolated <bold> attributes with css.
  32. # Strip more terminal control codes.
  33. # V0.19, 09 May 2014
  34. # http://github.com/pixelb/scripts/commits/master/scripts/ansi2html.sh
  35. if [ "$1" = "--version" ]; then
  36. printf '0.19\n' && exit
  37. fi
  38. if [ "$1" = "--help" ]; then
  39. printf '%s\n' \
  40. 'This utility converts ANSI codes in data passed to stdin
  41. It has 2 optional parameters:
  42. --bg=dark --palette=linux|solarized|tango|xterm
  43. E.g.: ls -l --color=always | ansi2html.sh --bg=dark > ls.html' >&2
  44. exit
  45. fi
  46. [ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; }
  47. if [ "$1" = "--palette=solarized" ]; then
  48. # See http://ethanschoonover.com/solarized
  49. P0=073642; P1=D30102; P2=859900; P3=B58900;
  50. P4=268BD2; P5=D33682; P6=2AA198; P7=EEE8D5;
  51. P8=002B36; P9=CB4B16; P10=586E75; P11=657B83;
  52. P12=839496; P13=6C71C4; P14=93A1A1; P15=FDF6E3;
  53. shift;
  54. elif [ "$1" = "--palette=solarized-xterm" ]; then
  55. # Above mapped onto the xterm 256 color palette
  56. P0=262626; P1=AF0000; P2=5F8700; P3=AF8700;
  57. P4=0087FF; P5=AF005F; P6=00AFAF; P7=E4E4E4;
  58. P8=1C1C1C; P9=D75F00; P10=585858; P11=626262;
  59. P12=808080; P13=5F5FAF; P14=8A8A8A; P15=FFFFD7;
  60. shift;
  61. elif [ "$1" = "--palette=tango" ]; then
  62. # Gnome default
  63. P0=000000; P1=CC0000; P2=4E9A06; P3=C4A000;
  64. P4=3465A4; P5=75507B; P6=06989A; P7=D3D7CF;
  65. P8=555753; P9=EF2929; P10=8AE234; P11=FCE94F;
  66. P12=729FCF; P13=AD7FA8; P14=34E2E2; P15=EEEEEC;
  67. shift;
  68. elif [ "$1" = "--palette=xterm" ]; then
  69. P0=000000; P1=CD0000; P2=00CD00; P3=CDCD00;
  70. P4=0000EE; P5=CD00CD; P6=00CDCD; P7=E5E5E5;
  71. P8=7F7F7F; P9=FF0000; P10=00FF00; P11=FFFF00;
  72. P12=5C5CFF; P13=FF00FF; P14=00FFFF; P15=FFFFFF;
  73. shift;
  74. else # linux console
  75. P0=000000; P1=AA0000; P2=00AA00; P3=AA5500;
  76. P4=0000AA; P5=AA00AA; P6=00AAAA; P7=AAAAAA;
  77. P8=555555; P9=FF5555; P10=55FF55; P11=FFFF55;
  78. P12=5555FF; P13=FF55FF; P14=55FFFF; P15=FFFFFF;
  79. [ "$1" = "--palette=linux" ] && shift
  80. fi
  81. [ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; }
  82. # Mac OSX's GNU sed is installed as gsed
  83. # use e.g. homebrew 'gnu-sed' to get it
  84. if ! sed --version >/dev/null 2>&1; then
  85. if gsed --version >/dev/null 2>&1; then
  86. alias sed=gsed
  87. else
  88. echo "Error, can't find an acceptable GNU sed." >&2
  89. exit 1
  90. fi
  91. fi
  92. printf '%s' "<html>
  93. <head>
  94. <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>
  95. <style type=\"text/css\">
  96. .ef0,.f0 { color: #$P0; } .eb0,.b0 { background-color: #$P0; }
  97. .ef1,.f1 { color: #$P1; } .eb1,.b1 { background-color: #$P1; }
  98. .ef2,.f2 { color: #$P2; } .eb2,.b2 { background-color: #$P2; }
  99. .ef3,.f3 { color: #$P3; } .eb3,.b3 { background-color: #$P3; }
  100. .ef4,.f4 { color: #$P4; } .eb4,.b4 { background-color: #$P4; }
  101. .ef5,.f5 { color: #$P5; } .eb5,.b5 { background-color: #$P5; }
  102. .ef6,.f6 { color: #$P6; } .eb6,.b6 { background-color: #$P6; }
  103. .ef7,.f7 { color: #$P7; } .eb7,.b7 { background-color: #$P7; }
  104. .ef8, .f0 > .bold,.bold > .f0 { color: #$P8; font-weight: normal; }
  105. .ef9, .f1 > .bold,.bold > .f1 { color: #$P9; font-weight: normal; }
  106. .ef10,.f2 > .bold,.bold > .f2 { color: #$P10; font-weight: normal; }
  107. .ef11,.f3 > .bold,.bold > .f3 { color: #$P11; font-weight: normal; }
  108. .ef12,.f4 > .bold,.bold > .f4 { color: #$P12; font-weight: normal; }
  109. .ef13,.f5 > .bold,.bold > .f5 { color: #$P13; font-weight: normal; }
  110. .ef14,.f6 > .bold,.bold > .f6 { color: #$P14; font-weight: normal; }
  111. .ef15,.f7 > .bold,.bold > .f7 { color: #$P15; font-weight: normal; }
  112. .eb8 { background-color: #$P8; }
  113. .eb9 { background-color: #$P9; }
  114. .eb10 { background-color: #$P10; }
  115. .eb11 { background-color: #$P11; }
  116. .eb12 { background-color: #$P12; }
  117. .eb13 { background-color: #$P13; }
  118. .eb14 { background-color: #$P14; }
  119. .eb15 { background-color: #$P15; }
  120. "
  121. # The default xterm 256 colour palette
  122. for red in 0 1 2 3 4 5 ; do
  123. for green in 0 1 2 3 4 5 ; do
  124. for blue in 0 1 2 3 4 5 ; do
  125. c=$((16 + ($red * 36) + ($green * 6) + $blue))
  126. r=$((($red * 40 + 55) * ($red > 0)))
  127. g=$((($green * 40 + 55) * ($green > 0)))
  128. b=$((($blue * 40 + 55) * ($blue > 0)))
  129. printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $r $g $b
  130. printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $r $g $b
  131. done
  132. done
  133. done
  134. for gray in $(seq 0 23); do
  135. c=$(($gray+232))
  136. l=$(($gray*10 + 8))
  137. printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $l $l $l
  138. printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $l $l $l
  139. done
  140. printf '%s' '
  141. .f9 { color: '`[ "$dark_bg" ] && printf "#$P7;" || printf "#$P0;"`' }
  142. .b9 { background-color: #'`[ "$dark_bg" ] && printf $P0 || printf $P15`'; }
  143. .f9 > .bold,.bold > .f9, body.f9 > pre > .bold {
  144. /* Bold is heavy black on white, or bright white
  145. depending on the default background */
  146. color: '`[ "$dark_bg" ] && printf "#$P15;" || printf "#$P0;"`'
  147. font-weight: '`[ "$dark_bg" ] && printf 'normal;' || printf 'bold;'`'
  148. }
  149. .reverse {
  150. /* CSS doesnt support swapping fg and bg colours unfortunately,
  151. so just hardcode something that will look OK on all backgrounds. */
  152. '"color: #$P0; background-color: #$P7;"'
  153. }
  154. .underline { text-decoration: underline; }
  155. .line-through { text-decoration: line-through; }
  156. .blink { text-decoration: blink; }
  157. </style>
  158. </head>
  159. <body class="f9 b9">
  160. <pre>
  161. '
  162. p='\x1b\[' #shortcut to match escape codes
  163. # Handle various xterm control sequences.
  164. # See /usr/share/doc/xterm-*/ctlseqs.txt
  165. sed "
  166. # escape ampersand
  167. s#&#\&amp;#g;
  168. s#\x1b[^\x1b]*\x1b\\\##g # strip anything between \e and ST
  169. s#\x1b][0-9]*;[^\a]*\a##g # strip any OSC (xterm title etc.)
  170. s#\r\$## # strip trailing \r
  171. # strip other non SGR escape sequences
  172. s#[\x07]##g
  173. s#\x1b[]>=\][0-9;]*##g
  174. s#\x1bP+.\{5\}##g
  175. s:λ:\&lambda;:g
  176. # Mark cursor positioning codes <LA>Jr;c;
  177. s#${p}\([0-9]\{1,2\}\)G#λJ;\1;#g
  178. s#${p}\([0-9]\{1,2\}\);\([0-9]\{1,2\}\)H#λJ\1;\2;#g
  179. # Mark clear as <LA>n where n=1 is screen and n=0 is to end-of-line
  180. s#${p}H#λC1;#g
  181. s#${p}K#λC0;#g
  182. # Mark Cursor move columns as <LA>Mn where n is +ve for right, -ve for left
  183. s#${p}C#λM1;#g
  184. s#${p}\([0-9]\{1,\}\)C#λM\1;#g
  185. s#${p}\([0-9]\{1,\}\)D#λM-\1;#g
  186. s#${p}\([0-9]\{1,\}\)P#λX\1;#g
  187. s#${p}[0-9;?]*[^0-9;?m]##g
  188. " |
  189. # Normalize the input before transformation
  190. sed "
  191. # escape HTML (ampersand done above)
  192. s#>#\&gt;#g; s#<#\&lt;#g; s#\"#\&quot;#g
  193. # normalize SGR codes a little
  194. # split 256 colors out and mark so that they're not
  195. # recognised by the following 'split combined' line
  196. :e
  197. s#${p}\([0-9;]\{1,\}\);\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m${p}¬\2m#g; t e
  198. s#${p}\([34]8;5;[0-9]\{1,3\}\)m#${p}¬\1m#g;
  199. :c
  200. s#${p}\([0-9]\{1,\}\);\([0-9;]\{1,\}\)m#${p}\1m${p}\2m#g; t c # split combined
  201. s#${p}0\([0-7]\)#${p}\1#g #strip leading 0
  202. s#${p}1m\(\(${p}[4579]m\)*\)#\1${p}1m#g #bold last (with clr)
  203. s#${p}m#${p}0m#g #add leading 0 to norm
  204. # undo any 256 color marking
  205. s#${p}¬\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m#g;
  206. # map 16 color codes to color + bold
  207. s#${p}9\([0-7]\)m#${p}3\1m${p}1m#g;
  208. s#${p}10\([0-7]\)m#${p}4\1m${p}1m#g;
  209. # change 'reset' code to <LA>R
  210. s#${p}0m#λR;#g
  211. " |
  212. # Convert SGR sequences to HTML
  213. sed "
  214. # common combinations to minimise html (optional)
  215. :f
  216. s#${p}3[0-7]m${p}3\([0-7]\)m#${p}3\1m#g; t f
  217. :b
  218. s#${p}4[0-7]m${p}4\([0-7]\)m#${p}4\1m#g; t b
  219. s#${p}3\([0-7]\)m${p}4\([0-7]\)m#<span class=\"f\1 b\2\">#g
  220. s#${p}4\([0-7]\)m${p}3\([0-7]\)m#<span class=\"f\2 b\1\">#g
  221. s#${p}1m#<span class=\"bold\">#g
  222. s#${p}4m#<span class=\"underline\">#g
  223. s#${p}5m#<span class=\"blink\">#g
  224. s#${p}7m#<span class=\"reverse\">#g
  225. s#${p}9m#<span class=\"line-through\">#g
  226. s#${p}3\([0-9]\)m#<span class=\"f\1\">#g
  227. s#${p}4\([0-9]\)m#<span class=\"b\1\">#g
  228. s#${p}38;5;\([0-9]\{1,3\}\)m#<span class=\"ef\1\">#g
  229. s#${p}48;5;\([0-9]\{1,3\}\)m#<span class=\"eb\1\">#g
  230. s#${p}[0-9;]*m##g # strip unhandled codes
  231. " |
  232. # Convert alternative character set and handle cursor movement codes
  233. # Note we convert here, as if we do at start we have to worry about avoiding
  234. # conversion of SGR codes etc., whereas doing here we only have to
  235. # avoid conversions of stuff between &...; or <...>
  236. #
  237. # Note we could use sed to do this based around:
  238. # sed 'y/abcdefghijklmnopqrstuvwxyz{}`~/▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·/'
  239. # However that would be very awkward as we need to only conv some input.
  240. # The basic scheme that we do in the awk script below is:
  241. # 1. enable transliterate once λT1; is seen
  242. # 2. disable once λT0; is seen (may be on diff line)
  243. # 3. never transliterate between &; or <> chars
  244. # 4. track x,y movements and active display mode at each position
  245. # 5. buffer line/screen and dump when required
  246. sed "
  247. # change 'smacs' and 'rmacs' to a single char so that we can easily do
  248. # negative matching, without using look-behind expressions etc.
  249. s#\x1b(0#λT1;#g;
  250. s#\x0E#λT1;#g;
  251. s#\x1b(B#λT0;#g
  252. s#\x0F#λT0;#g
  253. " |
  254. (
  255. awk '
  256. function dump_line(l,del,c,blanks,ret) {
  257. for(c=1;c<maxX;c++) {
  258. if ((c SUBSEP l) in attr || length(cur)) {
  259. ret = ret blanks fixas(cur,attr[c,l])
  260. if(del) delete attr[c,l]
  261. blanks=""
  262. }
  263. if ((c SUBSEP l) in dump) {
  264. ret=ret blanks dump[c,l]
  265. if(del) delete dump[c,l]
  266. blanks=""
  267. } else blanks=blanks " "
  268. }
  269. if(length(cur)) ret=ret blanks
  270. return ret
  271. }
  272. function dump_screen(l,ret) {
  273. for(l=1;l<=maxY;l++)
  274. ret=ret dump_line(l,0) "\n"
  275. return ret fixas(cur, "")
  276. }
  277. function atos(a,i,ret) {
  278. for(i=1;i<=length(a);i++) if(i in a) ret=ret a[i]
  279. return ret
  280. }
  281. function fixas(a,s,spc,i,attr,rm,ret) {
  282. spc=length(a)
  283. l=split(s,attr,">")
  284. for(i=1;i<=spc;i++) {
  285. rm=rm?rm:(a[i]!=attr[i]">")
  286. if(rm) {
  287. ret=ret "</span>"
  288. delete a[i];
  289. }
  290. }
  291. for(i=1;i<l;i++) {
  292. attr[i]=attr[i]">"
  293. if(a[i]!=attr[i]) {
  294. a[i]=attr[i]
  295. ret = ret attr[i]
  296. }
  297. }
  298. return ret
  299. }
  300. function encode(string,start,end,i,ret,pos,sc,buf) {
  301. if(!end) end=length(string);
  302. if(!start) start=1;
  303. state=3
  304. for(i=1;i<=length(string);i++) {
  305. c=substr(string,i,1)
  306. if(state==2) {
  307. sc=sc c
  308. if(c==";") {
  309. c=sc
  310. state=last_mode
  311. } else continue
  312. } else {
  313. if(c=="\r") { x=1; continue }
  314. if(c=="<") {
  315. # Change attributes - store current active
  316. # attributes in span array
  317. split(substr(string,i),cord,">");
  318. i+=length(cord[1])
  319. span[++spc]=cord[1] ">"
  320. continue
  321. }
  322. else if(c=="&") {
  323. # All goes to single positon till we see a semicolon
  324. sc=c
  325. state=2
  326. continue
  327. }
  328. else if(c=="\b") {
  329. # backspace move insertion point back 1
  330. if(spc) attr[x,y]=atos(span)
  331. x=x>1?x-1:1
  332. continue
  333. }
  334. else if(c=="λ") {
  335. split(substr(string,i+2),cord,";")
  336. cc=substr(string,i+1,1);
  337. if(cc=="T") {
  338. # Transliterate on/off
  339. if(cord[1]==1&&state==3) last_mode=state=4
  340. if(cord[1]==0&&state==4) last_mode=state=3
  341. }
  342. else if(cc=="C") {
  343. # Clear
  344. if(cord[1]+0) {
  345. # Screen - if Recording dump screen
  346. if(dumpStatus==dsActive) ret=ret dump_screen()
  347. dumpStatus=dsActive
  348. delete dump
  349. delete attr
  350. x=y=1
  351. } else {
  352. # To end of line
  353. for(pos=x;pos<maxX;pos++) {
  354. dump[pos,y]=" "
  355. if (!spc) delete attr[pos,y]
  356. else attr[pos,y]=atos(span)
  357. }
  358. }
  359. }
  360. else if(cc=="J") {
  361. # Jump to x,y
  362. i+=length(cord[2])+1
  363. # If line is higher - dump previous screen
  364. if(dumpStatus==dsActive&&cord[1]<y) {
  365. ret=ret dump_screen();
  366. dumpStatus=dsNew;
  367. }
  368. x=cord[2]
  369. if(length(cord[1]) && y!=cord[1]){
  370. y=cord[1]
  371. if(y>maxY) maxY=y
  372. # Change y - start recording
  373. dumpStatus=dumpStatus?dumpStatus:dsReset
  374. }
  375. }
  376. else if(cc=="M") {
  377. # Move left/right on current line
  378. x+=cord[1]
  379. }
  380. else if(cc=="X") {
  381. # delete on right
  382. for(pos=x;pos<=maxX;pos++) {
  383. nx=pos+cord[1]
  384. if(nx<maxX) {
  385. if((nx SUBSEP y) in attr) attr[pos,y] = attr[nx,y]
  386. else delete attr[pos,y]
  387. if((nx SUBSEP y) in dump) dump[pos,y] = dump[nx,y]
  388. else delete dump[pos,y]
  389. } else if(spc) {
  390. attr[pos,y]=atos(span)
  391. dump[pos,y]=" "
  392. }
  393. }
  394. }
  395. else if(cc=="R") {
  396. # Reset attributes
  397. while(spc) delete span[spc--]
  398. }
  399. i+=length(cord[1])+2
  400. continue
  401. }
  402. else if(state==4&&i>=start&&i<=end&&c in Trans) c=Trans[c]
  403. }
  404. if(dumpStatus==dsReset) {
  405. delete dump
  406. delete attr
  407. ret=ret"\n"
  408. dumpStatus=dsActive
  409. }
  410. if(dumpStatus==dsNew) {
  411. # After moving/clearing we are now ready to write
  412. # somthing to the screen so start recording now
  413. ret=ret"\n"
  414. dumpStatus=dsActive
  415. }
  416. if(dumpStatus==dsActive||dumpStatus==dsOff) {
  417. dump[x,y] = c
  418. if(!spc) delete attr[x,y]
  419. else attr[x,y] = atos(span)
  420. if(++x>maxX) maxX=x;
  421. }
  422. }
  423. # End of line if dumping increment y and set x back to first col
  424. x=1
  425. if(!dumpStatus) return ret dump_line(y,1);
  426. else if(++y>maxY) maxY=y;
  427. return ret
  428. }
  429. BEGIN{
  430. OFS=FS
  431. # dump screen status
  432. dsOff=0 # Not dumping screen contents just write output direct
  433. dsNew=1 # Just after move/clear waiting for activity to start recording
  434. dsReset=2 # Screen cleared build new empty buffer and record
  435. dsActive=3 # Currently recording
  436. F="abcdefghijklmnopqrstuvwxyz{}`~"
  437. T="▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·"
  438. maxX=80
  439. delete cur;
  440. x=y=1
  441. for(i=1;i<=length(F);i++)Trans[substr(F,i,1)]=substr(T,i,1);
  442. }
  443. { $0=encode($0) }
  444. 1
  445. END {
  446. if(dumpStatus) {
  447. print dump_screen();
  448. }
  449. }'
  450. # sed -e 's/[ĢΩ¡µ]//g' -e 's/β[^;]*;[^;]*;//g' # just strip aternative flag chars
  451. )
  452. printf '</pre>
  453. </body>
  454. </html>\n'