#!/bin/bash
# Best effort auto-pygmentization with transparent decompression
# by Reuben Thomas 2008-2022
# This program is in the public domain.

# Strategy: first see if pygmentize can find a lexer; if not, ask file; if that finds nothing, fail
# Set the environment variable PYGMENTIZE_OPTS or pass options before the file path to configure pygments.

# This program can be used as a .lessfilter for the less pager to auto-color less's output

file="${!#}"              # last argument
options=${@:1:$(($#-1))}  # handle others args as options to pass to pygmentize

file_common_opts="--brief --dereference"

case $(file --mime-type --uncompress $file_common_opts "$file") in
    application/xml|image/svg+xml) lexer=xml;;
    application/javascript) lexer=javascript;;
    application/json) lexer=json;;
    text/html) lexer=html;;
    text/troff) lexer=nroff;;
    text/x-asm) lexer=nasm;;
    text/x-awk) lexer=awk;;
    text/x-c) lexer=c;;
    text/x-c++) lexer=cpp;;
    text/x-clojure) lexer=clojure;;
    text/x-crystal) lexer=crystal;;
    text/x-diff) lexer=diff;;
    text/x-execline) lexer=execline;;
    text/x-forth) lexer=forth;;
    text/x-fortran) lexer=fortran;;
    text/x-gawk) lexer=gawk;;
    text/x-java) lexer=java;;
    text/x-lisp) lexer=common-lisp;;
    text/x-lua|text/x-luatex) lexer=lua;;
    text/x-makefile) lexer=make;;
    text/x-msdos-batch) lexer=bat;;
    text/x-nawk) lexer=nawk;;
    text/x-objective-c) lexer=objective-c;;
    text/x-pascal) lexer=pascal;;
    text/x-perl) lexer=perl;;
    text/x-php) lexer=php;;
    text/x-po) lexer=po;;
    text/x-python) lexer=python;;
    text/x-ruby) lexer=ruby;;
    text/x-script.python) lexer=python;;
    text/x-shellscript) lexer=sh;;
    text/x-tcl) lexer=tcl;;
    text/x-tex|text/x-texinfo) lexer=latex;; # FIXME: texinfo really needs its own lexer
    text/xml) lexer=xml;;
    text/vnd.graphviz) lexer=graphviz;;

    # Types that file outputs which pygmentize didn't support as of file 5.41, pygments 2.11.2
    # text/binary
    # text/calendar
    # text/PGP
    # text/prs.lines.tag
    # text/rtf
    # text/spreadsheet
    # text/texmacs
    # text/vcard
    # text/vnd.sosi
    # text/x-Algol68
    # text/x-bcpl
    # text/x-dmtf-mif
    # text/x-gimp-curve
    # text/x-gimp-ggr
    # text/x-gimp-gpl
    # text/x-info
    # text/x-installshield-lid
    # text/x-m4
    # text/x-modulefile
    # text/x-ms-adm
    # text/x-ms-cpx
    # text/x-ms-regedirt
    # text/x-ms-tag
    # text/x-systemtap
    # text/x-vcard
    # text/x-wine-extension-reg
    # text/x-xmcd

    text/plain)  # special filenames. TODO: insert more
        case $(basename "$file") in
            .zshrc) lexer=sh;;
        esac
        # pygmentize -N is much cheaper than file, but makes some bad guesses (e.g.
        # it guesses ".pl" is Prolog, not Perl)
        lexer=$(pygmentize -N "$file")
        ;;
esac

# Find a concatenator for compressed files
concat=
concat_opts=
case $(file $file_common_opts --mime-type "$file") in
    # TODO: add support
    # application/x-rzip (does not decompress to stdout)
    # application/x-dzip (Windows only)
    application/gzip|application/x-gzip)  concat=zcat;;
    application/x-bzip)                   concat=bzip; concat_opts=-dc;;
    application/x-bzip2)                  concat=bzcat;;
    application/x-lz4)                    concat=lz4; concat_opts=-dc;;
    application/x-lzh-compressed)         concat=p7zip; concat_opts=-dc;;
    application/x-lzma)                   concat=lzcat;;
    application/x-lzip)                   concat=lzip; concat_opts=-dc;;
    application/x-xz)                     concat=xzcat;;
    application/x-zoo)                    concat=zoo; concat_opts=fu;;
esac
# If concat is unset or doesn't exist, use cat instead
if [[ "$concat" == "" ]] || ! command -v "$concat"; then
    concat=cat
    concat_opts=
fi

# Find a suitable reader, preceded by a hex dump for binary files,
# or fmt for text with very long lines
prereader=""
reader=cat
encoding=$(file --mime-encoding --uncompress $file_common_opts "$file")
# FIXME: need a way to switch between hex and text view, as file often
# misdiagnoses files when they contain a few control characters
# if [[ $encoding == "binary" ]]; then
#     prereader="od -x" # POSIX fallback
#     if [[ -n $(which hd) ]]; then
#         prereader=hd # preferred
#     fi
#     lexer=hexdump
#     encoding=latin1
#el
# FIXME: Using fmt does not work well for system logs
# if [[ "$lexer" == "text" ]]; then
#    if file "$file" | grep -ql "text, with very long lines"; then
#        reader=fmt
#    fi
# fi
if [[ "$lexer" != "text" ]]; then
    reader="pygmentize -O inencoding=$encoding $PYGMENTIZE_OPTS $options -l $lexer"
fi

# Run the reader
if [[ -n "$prereader" ]]; then
    exec $concat "$file" | $prereader | $reader
else
    exec $concat "$file" | $reader
fi
