#!/bin/sh
# eget - simply shell on wget for loading directories over http (wget does not support wildcard for http)
# Example use:
# eget http://ftp.altlinux.ru/pub/security/ssl/*
#
# Copyright (C) 2014-2014, 2016, 2020  Etersoft
# Copyright (C) 2014 Daniil Mikhailov <danil@etersoft.ru>
# Copyright (C) 2016-2017, 2020 Vitaly Lipatov <lav@etersoft.ru>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#

fatal()
{
    echo "FATAL: $*" >&2
    exit 1
}

WGETQ='' #-q
CURLQ='' #-s

set_quiet()
{
    WGETQ='-q'
    CURLQ='-s'
}

# TODO: passthrou all wget options
if [ "$1" = "-q" ] ; then
    set_quiet
    shift
fi


WGET="$(which wget 2>/dev/null)"

if [ -n "$WGET" ] ; then
# put remote content to stdout
scat()
{
    $WGET $WGETQ -O- "$1"
}
# download to default name of to $2
sget()
{
    if [ -n "$2" ] ; then
       $WGET $WGETQ -O "$2" "$1"
    else
       $WGET $WGETQ "$1"
    fi
}

else
CURL="$(which curl 2>/dev/null)"
[ -n "$CURL" ] || fatal "There are no wget nor curl in the system. Install it with $ epm install curl"
# put remote content to stdout
scat()
{
    $CURL -L $CURLQ "$1"
}
# download to default name of to $2
sget()
{
    if [ -n "$2" ] ; then
       $CURL -L $CURLQ --output "$2" "$1"
    else
       $CURL -L $CURLQ -O "$1"
    fi
}
fi

LISTONLY=''
if [ "$1" = "--list" ] ; then
    LISTONLY="$1"
    set_quiet
    shift
fi

LATEST=''
if [ "$1" = "--latest" ] ; then
    LATEST="$1"
    shift
fi

fatal()
{
    echo "$*" >&2
    exit 1
}

# check man glob
filter_glob()
{
	[ -z "$1" ] && cat && return
	# translate glob to regexp
	grep "$(echo "$1" | sed -e "s|\*|.*|g" -e "s|?|.|g")$"
}

filter_order()
{
    [ -z "$LATEST" ] && cat && return
    sort -V | tail -n1
}

# download to this file
TARGETFILE=''
if [ "$1" = "-O" ] ; then
    TARGETFILE="$2"
    shift 2
fi

# TODO:
# -P support

if [ -z "$1" ] ; then
    echo "eget - wget like downloader" >&2
    fatal "Run $0 --help to get help"
fi

if [ "$1" = "-h" ] || [ "$1" = "--help" ] ; then
    echo "eget - wget like downloader with wildcard support in filename part of URL"
    echo "Usage: eget [-q] [-O target file] [--list] http://somesite.ru/dir/na*.log"
    echo
    echo "Options:"
    echo "    -q       - quiet mode"
    echo "    -O file  - download to this file (use filename from server if missed)"
    echo "    --list   - print files from url with mask"
    echo "    --latest - print only latest version of file"
    echo
    echo "eget supports --list and download for https://github.com/owner/project urls"
    echo
#    echo "See $ wget --help for wget options you can use here"
    exit
fi

get_github_urls()
{
    # https://github.com/OWNER/PROJECT
    local owner="$(echo "$1" | sed -e "s|^https://github.com/||" -e "s|/.*||")" #"
    local project="$(echo "$1" | sed -e "s|^https://github.com/$owner/||" -e "s|/.*||")" #"
    [ -n "$owner" ] || fatal "Can't get owner from $1"
    [ -n "$project" ] || fatal "Can't get project from $1"
    local URL="https://api.github.com/repos/$owner/$project/releases"
    scat $URL | \
        grep -i -o -E '"browser_download_url": "https://.*"' | cut -d'"' -f4
}

if echo "$1" | grep -q "^https://github.com/" && \
   echo "$1" | grep -q -v "/download/" && [ -n "$2" ] ; then
    MASK="$2"

    if [ -n "$LISTONLY" ] ; then
        get_github_urls "$1" | filter_glob "$MASK" | filter_order
        exit
    fi

    for fn in $(get_github_urls "$1" | filter_glob "$MASK" | filter_order) ; do
        sget "$fn" || ERROR=1
    done
    exit
fi


# do not support /
if echo "$1" | grep -q "/$" ; then
    fatal "Use http://example.com/e/* to download all files in dir"
fi

# If ftp protocol, just download
if echo "$1" | grep -q "^ftp://" ; then
    [ -n "$LISTONLY" ] && fatal "TODO: list files for ftp:// do not supported yet"
    sget "$1" "$TARGETFILE"
    exit
fi

# mask allowed only in the last part of path
MASK=$(basename "$1")

# if mask are second arg
if [ -n "$2" ] ; then
    URL="$1"
    MASK="$2"
else
    # drop mask part
    URL="$(dirname "$1")"
fi

if echo "$URL" | grep -q "[*?]" ; then
    fatal "Error: there are globbing symbols (*?) in $URL"
fi

# If have no wildcard symbol like asterisk, just download
if echo "$MASK" | grep -qv "[*?]" || echo "$MASK" | grep -q "[?].*="; then
    sget "$1" "$TARGETFILE"
    exit
fi

is_url()
{
    echo "$1" | grep -q "://"
}

get_urls()
{
    scat $URL/ | \
         grep -i -o -E 'href="(.+)"' | cut -d'"' -f2
}

if [ -n "$LISTONLY" ] ; then
    for fn in $(get_urls | filter_glob "$MASK" | filter_order) ; do
        is_url "$fn" && echo $fn && continue
        fn="$(echo "$fn" | sed -e 's|^./||' -e 's|^/+||')"
        echo "$URL/$fn"
    done
    exit
fi

ERROR=0
for fn in $(get_urls | filter_glob "$MASK" | filter_order) ; do
    sget "$URL/$(basename "$fn")" || ERROR=1
done
exit $ERROR