#!/bin/bash --
#
# Shell script to query the Wikipedia.
#
# It can be used to output Wikipedia articles to the console, but can also
# just open the article in any browser.
#
# Author: Christian Brabandt <cb@256bit.org>
# License: BSD
VERSION=0.14

set -e

function display_help(){ #{{{1
cat << EOF
NAME

    This script uses text-browser to query and render Wikipedia
    articles. The output will be printed to standard out.

SYNOPSIS
    `basename $0` [-BCnNoOpPsSuU] [-b prog] [-c patt] [-i patt] [-l lang]
                   [-X browseroptions] query
    `basename $0` -o [-b prog] [-l lang] query
    `basename $0` [-h]
    `basename $0` -v|-r

    -n  do not colorize         -N  simple colorization (alias -C)
    -p  display using a pager   -P  don't use pager
    -o  open Wikipedia article  -O  don't open in browser
    -s  display only a summary  -S  display whole article
    -u  output the query URL    -U  open URL in browser
    -v  display version         -h  display help
    -t  show available sections

    -r           open Random Page
    -d           debug mode
    -i patt      colorize pattern (case insensitive)
    -I patt      colorize pattern (case-sensitive, alias -c)
    -b prog      use prog as browser (by default to invoke
                 elinks, links2, links, lynx or w3m, if found)
    -l lang      use language (currently supported are: af, als, ca, cs, da,
                 de, en, eo, es, fi, fr, hu, ia, is, it, la, lb, nds, nl, nn,
                 no, pl, pt, rm, ro, simple, sk, sl, sv, tr)
    -T custom    print custom section (anything in html h2 tag)
    -W url       use url as base-URL for wikipedia (e.g. use a different
                 Wiki, Querying this URL will happen by appending the search
                 term.
    -X "options" pass through options to browser, e.g., "-width 180"
                 (warnings: must be in quotes; browser specific, not checked)

    Query can be any term to search for at Wikipedia. Special
    characters will be taken care of. Note that only one query term is
    supported, however this term can consist of one or more words.

    Configuration can also be controlled by creating a runcontrol file
    .`basename $0`rc your home directory.

    Note that when requesting to open the article in a browser, other
    parameters will be ignored. The same holds for the options -h and
    -v.
EOF
}
function getVersion(){ #{{{1
cat <<EOF
  $(basename "${0}") Version: ${VERSION}
EOF
}
function errorExit(){ #{{{1
  echo "$*" >&2
  exit 3
}
function colorize(){ #{{{1
if [ "${IGNCASE}" = "true" ]; then
  OUTPUT=$(echo -e "`cat`"|sed -s "s|\(${PATT}\)|\\\033\[0;31m\1\\\033\[0m|gi")
else
  OUTPUT=$(echo -e "`cat`"|sed -s "s|\(${PATT}\)|\\\033\[0;31m\1\\\033\[0m|g")
fi
echo -e "${OUTPUT}"
}
function uri_decode(){ #{{{1
  echo -e "$*" |perl -MURI::Escape -lne 's/ /_/g;s/"//g;print uri_escape($_);'
}
function localize(){ #{{{1
# Per default we use the english localized version of
# Wikipedia
LOCAL=$(echo ${LOCAL:="en"})
if [ "${LOCAL}" = "de" ]; then
  MARKER='^\s*Kategorien\?:'
  MARKER2='Bearbeiten'
  RANDOMP='Spezial:Zufällige Seite'
elif [ "${LOCAL}" = "en" -o "${LOCAL}" = "simple" ]; then
  MARKER='^\s*Categories:\|^\s*Category:'
  MARKER2='edit'
  RANDOMP='Special:Random'
elif [ "${LOCAL}" = "fr" ]; then
  MARKER='^\s*Catégories :'
  MARKER2='modifier'
  RANDOMP='Special:Random'
elif [ "${LOCAL}" = "nl" ]; then
  MARKER='^\s*Categorie:'
  MARKER2='bewerk'
  RANDOMP='Speciaal:Willekeurig'
elif [ "${LOCAL}" = "sv" ]; then
  MARKER='^\s*Kategorier:'
  MARKER2='redigera'
  RANDOMP='Special:Random'
elif [ "${LOCAL}" = "es" ]; then
  MARKER='^\s*Categorías:'
  MARKER2='editar'
  RANDOMP='Especial:Random'
elif [ "${LOCAL}" = "pt" ]; then
  MARKER='^\s*Categorias:'
  MARKER2='editar'
  RANDOMP='Especial:Random'
elif [ "${LOCAL}" = "pl" ]; then
  MARKER='^\s*Kategorie:'
  MARKER2='Edytuj'
  RANDOMP='Specjalna:Losowa_strona'
elif [ "${LOCAL}" = "it" ]; then
  MARKER='^\s*Categorie:'
  MARKER2='modifica'
  RANDOMP='Speciale:PaginaCasuale'
elif [ "${LOCAL}" = "da" ]; then
  MARKER='^\s*Kategori:'
  MARKER2='\(redigér\|rediger\)'
  RANDOMP='Speciel:Tilfældig_side'
elif [ "${LOCAL}" = "eo" ]; then
  MARKER='^\s*Kategorio:'
  MARKER2='redaktu'
  RANDOMP='Speciala:Random'
elif [ "${LOCAL}" = "no" ]; then
  MARKER='^\s*Kategorier:'
  MARKER2='rediger'
  RANDOMP='Spesial:Tilfeldig_side'
elif [ "${LOCAL}" = "nn" ]; then
  MARKER='^\s*Kategoriar:'
  MARKER2='endre'
  RANDOMP='Special:Random'
elif [ "${LOCAL}" = "fi" ]; then
  MARKER='^\s*Luokat:'
  MARKER2='muokkaa'
  RANDOMP='Toiminnot:Satunnainen_sivu'
elif [ "${LOCAL}" = "ca" ]; then
  MARKER='^\s*Categoria:'
  MARKER2='edita'
  RANDOMP='Especial:Random'
elif [ "${LOCAL}" = "ro" ]; then
  MARKER='^\s*Categorii:'
  MARKER2='modifica'
  RANDOMP='Special:Random'
elif [ "${LOCAL}" = "cs" ]; then
  MARKER='^\s*Kategorie:'
  MARKER2='editovat'
  RANDOMP='Speciální:Random'
elif [ "${LOCAL}" = "sk" ]; then
  MARKER='^\s*Kategórie:'
  MARKER2='úprava'
  RANDOMP='Špeciálne:Random'
elif [ "${LOCAL}" = "sl" ]; then
  MARKER='^\s*Kategorije:'
  MARKER2='spremeni'
  RANDOMP='Posebno:Random'
elif [ "${LOCAL}" = "lb" ]; then
  MARKER='^\s*Kategorie:'
  MARKER2='Änneren'
  RANDOMP='Special:Random'
elif [ "${LOCAL}" = "la" ]; then
  MARKER='^\s*Categoriae:'
  MARKER2='recensere'
  RANDOMP='Specialis:Random'
elif [ "${LOCAL}" = "rm" ]; then
  MARKER='Views'
  MARKER2='edit'
  RANDOMP='Special:Random'
elif [ "${LOCAL}" = "ia" ]; then
  MARKER='Views'
  MARKER2='modificar'
  RANDOMP='Special:Random'
elif [ "${LOCAL}" = "is" ]; then
  MARKER='^\s*Flokkar:'
  MARKER2='breyta'
  RANDOMP='Kerfissíða:Random'
elif [ "${LOCAL}" = "hu" ]; then
  MARKER='^\s*Kategóriák:'
  MARKER2='szerkesztés'
  RANDOMP='Speciális:Lap_találomra'
elif [ "${LOCAL}" = "tr" ]; then
  MARKER='^\s*Sayfa kategorisi:'
  MARKER2='degistir'
  RANDOMP='Özel:Random'
elif [ "${LOCAL}" = "af" ]; then
  MARKER='^\s*Kategorieë: '
  MARKER2='wysig'
  RANDOMP='Spesiaal:Random'
elif [ "${LOCAL}" = "nds" ]; then
  MARKER='^\s*Kategorien:'
  MARKER2='Ännern'
  RANDOMP='Spezial:Random'
elif [ "${LOCAL}" = "als" ]; then
  MARKER='^\s*Kategorie:'
  MARKER2='ändere'
  RANDOMP='Spezial:Zufällige_Seite'
else
  MARKER='\(Views\|References\|Visible links\)'
  RANDOMP='Special:Random'
fi
}
function stripOutput(){ #{{{1
# Now comes the magic: Strip everything from Marker to end,
# cause this is only the linkdump
SED='sed -e "s|\^\?\\[[0-9]*\\]||g" -e "s|\\[IMG\\]||g" -e "/${MARKER}/,$ D" '
if [ -n "${MARKER2}" ]; then
  echo "`cat`"| eval ${SED} -e '"s#\[${MARKER2}\]##g"'
else
  echo "`cat`"| eval ${SED}
fi
}
function openurl(){ #{{{1
  "${BROWSER}" "${URL}"
}
function summary() { #{{{1
  TMPFILE="/tmp/wiki-sum_$$.html"
  if [ "${COLOR}" = "true" ]; then
    summaryCommand="curl -s -L ${URL} | grep \/table -A400 | grep -v \/table  | grep \<div -m1 -B400 |
      sed -e \"\$ d\" > $TMPFILE && w3m -dump $TMPFILE | stripOutput | colorize && rm $TMPFILE"
  else
    summaryCommand="curl -s -L ${URL} | grep \/table -A400 | grep -v \/table  | grep \<div -m1 -B400 |
      sed -e \"\$ d\" > $TMPFILE && w3m -dump $TMPFILE | stripOutput && rm $TMPFILE"
  fi
  eval ${summaryCommand}
}
function print_sections() { #{{{1
  TMPFILE="/tmp/wiki-sections_$$.html"
  Command="curl -s -L ${URL} | grep '\(</\?html\)\|\(</\?body\)\|\(<h[12]\)' |
    sed -e 's/^.*<h2/<h2/' > $TMPFILE && w3m -dump $TMPFILE | stripOutput && rm $TMPFILE"
  eval ${Command}
}
function print_section_detail() { #{{{1
  TMPFILE="/tmp/wiki-section_$$.html"
  Command="curl -s -L ${URL} |
    sed -n -e '/\(<\/\?html\)\|\(<\/\?body\)\|\(<h1\)/p' -e \"/^.*<h2.*$*/,/^.*<\/h2.*>/p\" |
    sed -e 's/^.*<h2/<h2/' > $TMPFILE && w3m -dump $TMPFILE | stripOutput"
  if [ "${COLOR}" = "true" ]; then
    eval "${Command} | colorize"
  else
    eval "${Command}"
  fi
  rm $TMPFILE
}
function getInfo(){ #{{{1
  getInfoCommand="${BROWSER} ${BROWSEROPTIONS} -dump ${URL} | stripOutput"
  if [ "${COLOR}" = "true" ]; then
    getInfoCommand="${getInfoCommand} | colorize"
  fi
  eval ${getInfoCommand}
}

# First read in the Run configuration File, if one is found #{{{1
if [ -r ~/.`basename $0`rc ]; then
  source ~/.`basename $0`rc
  ABROWSER=${BROWSER}
fi

# Process commandline parameters {{{1
while getopts "BCdnNoOpPsStuvhrUl:b:c:i:I:B:T:W:X:-help" ARGS
  do
  case ${ARGS} in
    b) ABROWSER=${OPTARG} ;;
    d) DEBUG="true" ;;
    B) ABROWSER='' ;;
    c) IGNCASE="false";COLOR="true"; PATT=${OPTARG} ;;
    C) COLOR="true" ;;
    i) IGNCASE="true";COLOR="true"; PATT=${OPTARG} ;;
    I) IGNCASE="false";COLOR="true"; PATT=${OPTARG} ;;
    l) LOCAL=${OPTARG} ;;
    n) COLOR="false" ;;
    N) COLOR="true" ;;
    o) USEBROWSER="true" ;;
    O) USEBROWSER="false" ;;
    p) PAGER="true" ;;
    P) PAGER="false" ;;
    r) RAND="true" ;;
    s) SHORT="true" ;;
    S) SHORT="false" ;;
    t) SECTION="show" ;;
    T) SECTION=$OPTARG ;;
    u) OUTPUTURL="true" ;;
    U) OPENURL="true";;
    v) getVersion; exit 0 ;;
    W) WURL=${OPTARG} ;;
    X) BROWSEROPTIONS=${OPTARG} ;;
    h) display_help; exit 0 ;;
    -help) display_help; exit 0 ;;
    *) display_help; exit 1 ;;
  esac
done

shift `expr ${OPTIND} - 1`

# Init some variables {{{1
localize

# Setting Up some Variables, to determine, what actually to do
if [ -z "$1"  -a  -z "${RAND}" ]; then
  display_help
  exit 1;
fi

IGNCASE=$(echo ${IGNCASE:="false"})
PAGER=$(echo ${PAGER:="false"})
OPENURL=$(echo ${OPENURL:="false"})
RAND=$(echo ${RAND:="false"})

if [ "$PAGER" = "true" ]; then
  { PAGER=$(which less) || PAGER=$(which more) ; } || errorExit "No Pager found!" ;
fi

PAGER=$(echo ${PAGER/less/less -Rr})
COLOR=$(echo ${COLOR:="false"})

if [ "$COLOR" = "true" -a -z "${PATT}" ]; then
  PATT="$*"
fi

if [ "$OPENURL" = "true" ]; then
  URL="$*"
fi

# Check for Alternative Browser
if [ -n "${ABROWSER}" ]; then
  BROWSER=$(which "${ABROWSER}") ||  errorExit "${ABROWSER} not found"
elif [ -n "${BROWSER}" ]; then
  BROWSER=$(which "${BROWSER}") ||  errorExit "${BROWSER} not found"
else
  { BROWSER=$(which w3m) ||
    BROWSER=$(which elinks) ||
    BROWSER=$(which links2) ||
    BROWSER=$(which lynx) ||
    BROWSER=$(which links.main) ||
    BROWSER=$(which links) ; } || errorExit "No Browser found"
fi

# Open page in Browser?
USEBROWSER=$(echo ${USEBROWSER:="false"})

# Output only a summary?
SHORT=$(echo ${SHORT:="false"})

# custom Section
SECTION=$(echo ${SECTION:=""})

# Output only the URL?
OUTPUTURL=$(echo ${OUTPUTURL:="false"})

# Now we do some input sanitizing. {{{1
ARGUMENT="$(uri_decode "$*")"
LOCAL="$(echo "${LOCAL}"|tr '[:upper:]' '[:lower:]')"
# Random page?
if [ "${RAND}" = "true" ]; then
  ARGUMENT="$(uri_decode "${RANDOMP}")"
fi
if [ -z "${URL}" ]; then
  URL="http://${LOCAL}.wikipedia.org/wiki/${ARGUMENT}"
fi

if [ -n "${WURL}" ]; then
  WURL="$(echo "${WURL%%/}")"
  case "${WURL}" in
    http://*) URL="${WURL}"/wiki/"${ARGUMENT}" ;;
    *) URL="http://""${WURL}"/wiki/"${ARGUMENT}" ;;
  esac;
  # unset $LOCAL to force using an english-locale
  # this is used to strip the tags [edit], eg.
  LOCAL="en"
fi

# Debug mode? {{{1
if [ "${DEBUG:=false}" = "true" ]; then 
  printf "PAGER: $PAGER Browser: $BROWSER Local: $LOCAL COLOR: $COLOR PATT: $PATT IGNCASE: $IGNCASE URL: $URL Summary: $SHORT\n"
fi

# Depending on some Variables, we do some different things here {{{1
if [ "${USEBROWSER}" = "true" ]; then
  openurl
  exit 0;
fi

if [ "${SHORT}" = "true" ]; then
  summary
  exit 0;
fi

if [ "${SECTION}" = "show" ]; then
  print_sections
  exit 0;
elif [ -n "${SECTION}" ]; then
  print_section_detail ${SECTION}
  exit 0;
fi

if [ "${OUTPUTURL}" = "true" ]; then
  if [ "${COLOR}" = "false" ]; then
    echo "${URL}"
    echo "${BROWSER}" "${BROWSEROPTIONS}" -dump "${URL}"
  else
    echo -e "\033[0;34m${URL}\033[0m"
  fi
  exit 0;
fi

if [ "$PAGER" != "false" ]; then
  getInfo | ${PAGER}
else
  getInfo
fi
# Vim Modeline {{{1
# vim: ft=sh et sts=-1 sw=0 ts=2
