tools_eget 30.6 KB
Newer Older
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1
#!/bin/sh
2
# eget - simply shell on wget for loading directories over http (wget does not support wildcard for http)
3
# Use:
4
# eget http://ftp.altlinux.ru/pub/security/ssl/*
Vitaly Lipatov's avatar
Vitaly Lipatov committed
5
#
6
# Copyright (C) 2014-2014, 2016, 2020, 2022  Etersoft
7
# Copyright (C) 2014 Daniil Mikhailov <danil@etersoft.ru>
8
# Copyright (C) 2016-2017, 2020, 2022 Vitaly Lipatov <lav@etersoft.ru>
Vitaly Lipatov's avatar
Vitaly Lipatov committed
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#

24 25 26 27 28 29 30 31 32 33
PROGDIR=$(dirname "$0")
PROGNAME=$(basename "$0")
CMDSHELL="/bin/sh"
[ "$PROGDIR" = "." ] && PROGDIR="$(pwd)"
if [ "$0" = "/dev/stdin" ] || [ "$0" = "sh" ] ; then
    PROGDIR=""
    PROGNAME=""
fi


34 35 36 37 38 39
fatal()
{
    echo "FATAL: $*" >&2
    exit 1
}

40 41
info()
{
Vitaly Lipatov's avatar
Vitaly Lipatov committed
42
    [ -n "$quiet" ] && return
43 44 45 46 47
    echo "$*" >&2
}

eget()
{
48 49
	if [ -n "$EPMMODE" ] ; then
		# if embedded in epm
50
		(unset EGET_IPFS_GATEWAY; unset EGET_IPFS_API ; unset EGET_IPFS_DB ; internal_tools_eget "$@" )
51 52
		return
	fi
53

54
	[ -n "$PROGNAME" ] || fatal "pipe mode is not supported"
55

56 57 58 59
	local bashopt=''
	#[ -n "$verbose" ] && bashopt='-x'

	(unset EGET_IPFS_GATEWAY; unset EGET_IPFS_API ; unset EGET_IPFS_DB ; $CMDSHELL $bashopt $PROGDIR/$PROGNAME "$@" )
60
}
61

62 63
# TODO:
arch="$(uname -m)"
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85

# copied from eepm project

# copied from /etc/init.d/outformat (ALT Linux)
isatty()
{
	# Set a sane TERM required for tput
	[ -n "$TERM" ] || TERM=dumb
	export TERM
	test -t 1
}

isatty2()
{
	# check stderr
	test -t 2
}


check_tty()
{
	isatty || return
86
	is_command tput >/dev/null 2>/dev/null || return
87
	# FreeBSD does not support tput -S
88 89
	echo | a= tput -S >/dev/null 2>/dev/null || return
	export USETTY="tput -S"
90 91 92 93 94 95
}

: ${BLACK:=0} ${RED:=1} ${GREEN:=2} ${YELLOW:=3} ${BLUE:=4} ${MAGENTA:=5} ${CYAN:=6} ${WHITE:=7}

set_boldcolor()
{
96
	[ -n "$USETTY" ] || return
97 98 99
	{
		echo bold
		echo setaf $1
100 101 102 103 104 105 106 107 108
	} | $USETTY
}

set_color()
{
	[ -n "$USETTY" ] || return
	{
		echo setaf $1
	} | $USETTY
109 110 111 112
}

restore_color()
{
113
	[ -n "$USETTY" ] || return
114 115 116
	{
		echo op; # set Original color Pair.
		echo sgr0; # turn off all special graphics mode (bold in our case).
117
	} | $USETTY
118 119
}

120

121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
echover()
{
    [ -n "$verbose" ] || return
    echo "$*" >&2
}

# Print command line and run command line
showcmd()
{
	if [ -z "$quiet" ] ; then
		set_boldcolor $GREEN
		local PROMTSIG="\$"
		[ "$UID" = 0 ] && PROMTSIG="#"
		echo " $PROMTSIG $@"
		restore_color
	fi >&2
}

# Print command line and run command line
docmd()
{
	showcmd "$@"
	"$@"
}

146 147 148 149 150 151
verdocmd()
{
	[ -n "$verbose" ] && showcmd "$@"
	"$@"
}

152

153 154
# copied from epm
# print a path to the command if exists in $PATH
155
if a= which which 2>/dev/null >/dev/null ; then
156 157 158
    # the best case if we have which command (other ways needs checking)
    # TODO: don't use which at all, it is binary, not builtin shell command
print_command_path()
159
{
160
    a= which -- "$1" 2>/dev/null
161
}
162
elif a= type -a type 2>/dev/null >/dev/null ; then
163 164
print_command_path()
{
165
    a= type -fpP -- "$1" 2>/dev/null
166 167 168 169
}
else
print_command_path()
{
170
    a= type "$1" 2>/dev/null | sed -e 's|.* /|/|'
171
}
Vitaly Lipatov's avatar
Vitaly Lipatov committed
172 173
fi

174 175 176 177 178 179
# check if <arg> is a real command
is_command()
{
    print_command_path "$1" >/dev/null
}

Vitaly Lipatov's avatar
Vitaly Lipatov committed
180 181 182 183 184 185 186 187 188
# add realpath if missed
if ! is_command realpath ; then
realpath()
{
    [ -n "$*" ] || return
    readlink -f "$@"
}
fi

189

190 191 192 193 194
# check man glob
filter_glob()
{
	[ -z "$1" ] && cat && return
	# translate glob to regexp
195
	grep "$(echo "$1" | sed -e 's|\.|\\.|g' -e 's|\*|.*|g' -e 's|\?|.|g' )$"
196 197 198 199 200 201 202 203 204 205 206 207
}

filter_order()
{
    if [ -n "$SECONDLATEST" ] ; then
        sort -V | tail -n2 | head -n1
        return
    fi
    [ -z "$LATEST" ] && cat && return
    sort -V | tail -n1
}

208 209 210 211

is_fileurl()
{
    echo "$1" | grep -q "^/" && return
212
    echo "$1" | grep -q "^file:/"
213 214
}

215
path_from_url()
216 217 218 219 220 221
{
    echo "$1" | sed -e 's|^file://*|/|'
}

is_url()
{
222
    echo "$1" | grep -q "^[filehtps]*:/"
223 224
}

225 226 227 228 229 230 231
is_strange_url()
{
    local URL="$1"
    is_url "$URL" || return
    echo "$URL" | grep -q "[?&]"
}

232 233
is_ipfs_hash()
{
234 235 236 237
    # If a CID is 46 characters starting with "Qm", it's a CIDv0
    echo "$1" | grep -q -E "^Qm[[:alnum:]]{44}$" && return
    # TODO: CIDv1 support, see https://github.com/multiformats/cid
    return 1
238 239 240 241 242 243 244 245
}

is_ipfsurl()
{
    is_ipfs_hash "$1" && return
    echo "$1" | grep -q "^ipfs://"
}

246 247 248 249 250 251 252
is_httpurl()
{
    # TODO: improve
    echo "$1" | grep -q "^https://" & return
    echo "$1" | grep -q "^http://" & return
}

253 254
cid_from_url()
{
255
    echo "$1" | sed -e 's|^ipfs://*||' -e 's|\?.*||'
256 257
}

258

259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
# args: cmd <URL> <options>
# will run cmd <options> <URL>
download_with_mirroring()
{
    local CMD="$1"
    shift
    local URL="$1"
    shift

    local res
    $CMD "$@" "$URL" && return
    res=$?
    [ -n "$CHECKMIRRORS" ] || return $res

    MIRROR="https://mirror.eterfund.ru"
    SECONDURL="$(echo "$URL" | sed -e "s|^.*://|$MIRROR/|")"
    $CMD "$@" "$SECONDURL" && URL="$SECONDURL" && return

    MIRROR="https://mirror.eterfund.org"
    SECONDURL="$(echo "$URL" | sed -e "s|^.*://|$MIRROR/|")"
    $CMD "$@" "$SECONDURL" && URL="$SECONDURL" && return
}

282

283 284 285

check_tty

Vitaly Lipatov's avatar
Vitaly Lipatov committed
286
quiet=''
287
verbose=''
288 289 290 291
WGETNOSSLCHECK=''
CURLNOSSLCHECK=''
WGETUSERAGENT=''
CURLUSERAGENT=''
292 293
WGETCOMPRESSED=''
CURLCOMPRESSED=''
294 295
WGETQ='' #-q
CURLQ='' #-s
296 297 298
# TODO: aria2c
# TODO: wget --trust-server-names
# TODO: 
299
WGETNAMEOPTIONS='--content-disposition'
300
CURLNAMEOPTIONS='--remote-name --remote-time --remote-header-name'
301

302 303
LISTONLY=''
CHECKURL=''
304
GETRESPONSE=''
305
GETFILENAME=''
306
GETREALURL=''
307
GETIPFSCID=''
308 309 310 311
LATEST=''
SECONDLATEST=''
CHECKMIRRORS=''
TARGETFILE=''
312
FORCEIPV=''
313

314

315 316 317 318
set_quiet()
{
    WGETQ='-q'
    CURLQ='-s'
Vitaly Lipatov's avatar
Vitaly Lipatov committed
319
    quiet=1
320 321 322
}


323 324
eget_help()
{
325 326 327 328 329 330
cat <<EOF

eget - wget like downloader wrapper with wildcard support in filename part of URL
Usage: eget [options] http://somesite.ru/dir/na*.log

Options:
331
    -q|--quiet                - quiet mode
332
    --verbose                 - verbose mode
333 334
    -k|--no-check-certificate - skip SSL certificate chain support
    -U|-A|--user-agent        - send browser like UserAgent
335
    --compressed              - request a compressed response and automatically decompress the content
336 337
    -4|--ipv4|--inet4-only    - use only IPV4
    -6|--ipv6|--inet6-only    - use only IPV6
338 339 340 341 342 343 344 345
    -O-|-O -                  - output downloaded file to stdout
    -O file                   - download to this file
    --latest                  - print only latest version of a file
    --second-latest           - print only second to latest version of a file
    --allow-mirrors           - check mirrors if url is not accessible

    --list|--list-only        - print only URLs
    --check URL               - check if the URL is accessible (returns HTTP 200 OK)
346
    --get-response URL        - get response with all headers (ever if HEAD is not acceptable)
347
    --get-filename URL        - print filename for the URL (via Content-Disposition if applicable)
348
    --get-real-url URL        - print URL after all redirects
349

350 351 352
Supported URLs:
  ftp:// http:// https:// file:/ ipfs://

353 354
Supported backends (set like EGET_BACKEND=curl)
  wget curl (todo: aria2c)
355

356 357 358 359
Examples:
  $ eget http://ftp.somesite.ru/package-*.x64.tar
  $ eget http://ftp.somesite.ru/package *.tar
  $ eget https://github.com/owner/project package*.ext
360
  $ eget -O myname ipfs://QmVRUjnsnxHWkjq91KreCpUk4D9oZEbMwNQ3rzdjwND5dR
361 362 363 364 365 366
  $ eget --list http://ftp.somesite.ru/package-*.tar
  $ eget --check http://ftp.somesite.ru/test
  $ eget --list http://download.somesite.ru 'package-*.tar.xz'
  $ eget --list --latest https://github.com/telegramdesktop/tdesktop/releases 'tsetup.*.tar.xz'

EOF
367
}
368 369


370 371 372 373
if [ -z "$1" ] ; then
    echo "eget - wget like downloader wrapper with wildcard support, uses wget or curl as backend" >&2
    echo "Run $0 --help to get help" >&2
    exit 1
374 375 376
fi


377 378 379 380 381 382 383
while [ -n "$1" ] ; do

    case "$1" in
        -h|--help)
            eget_help
            exit
            ;;
384
        -q|--quiet)
385 386
            set_quiet
            ;;
387 388 389
        --verbose)
            verbose="$1"
            ;;
390 391 392 393 394 395 396 397 398
        -k|--no-check-certificate)
            WGETNOSSLCHECK='--no-check-certificate'
            CURLNOSSLCHECK='-k'
            ;;
        -U|-A|--user-agent)
            user_agent="Mozilla/5.0 (X11; Linux $arch) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
            WGETUSERAGENT="-U '$user_agent'"
            CURLUSERAGENT="-A '$user_agent'"
            ;;
399 400 401 402
        --compressed)
            CURLCOMPRESSED='--compressed'
            WGETCOMPRESSED='--compression=auto'
            ;;
403 404 405 406 407 408
        -4|--ipv4|--inet4-only)
            FORCEIPV="-4"
            ;;
        -6|--ipv6|--inet6-only)
            FORCEIPV="-6"
            ;;
409
        --list|--list-only)
410 411 412 413 414
            LISTONLY="$1"
            set_quiet
            ;;
        --check)
            CHECKURL="$1"
415
            #set_quiet
416
            ;;
417 418 419
        --get-filename)
            GETFILENAME="$1"
            ;;
420 421 422
        --get-response)
            GETRESPONSE="$1"
            ;;
423 424 425
        --get-real-url)
            GETREALURL="$1"
            ;;
426 427 428
        --get-ipfs-cid)
            GETIPFSCID="$1"
            ;;
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
        --latest)
            LATEST="$1"
            ;;
        --second-latest)
            SECONDLATEST="$1"
            ;;
        --check-mirrors)
            CHECKMIRRORS="$1"
            ;;
        -O)
            shift
            TARGETFILE="$1"
            ;;
        -O-)
            TARGETFILE="-"
            ;;
        *)
            break
            ;;
    esac
449
    shift
450
done
451 452


453 454
#############################3
# defaults
455 456

# https://github.com/ipfs/kubo/issues/5541
457
ipfs_diag_timeout='--timeout 10s'
458

459 460 461 462 463 464 465 466 467 468 469 470 471 472
ipfs_api_local="/ip4/127.0.0.1/tcp/5001"
[ -n "$EGET_IPFS_API" ] && ipfs_api_local="$EGET_IPFS_API"

ipfs_api_brave="/ip4/127.0.0.1/tcp/45005"

ipfs_gateway="https://cloudflare-ipfs.com/ipfs"
[ -n "$EGET_IPFS_GATEWAY" ] && ipfs_gateway="$EGET_IPFS_GATEWAY"
IPFS_GATEWAY="$ipfs_gateway"

# Test data: https://etersoft.ru/templates/etersoft/images/logo.png
ipfs_checkQm="QmYwf2GAMvHxfFiUFL2Mr6KUG6QrDiupqGc8ms785ktaYw"

get_ipfs_brave()
{
473
    local ipfs_brave="$(ls ~/.config/BraveSoftware/Brave-Browser/*/*/go-ipfs_* 2>/dev/null)"
474 475 476 477 478 479
    [ -n "$ipfs_brave" ] && [ -x "$ipfs_brave" ] || return
    echo "$ipfs_brave"
}

ipfs_access()
{
480
    [ -n "$IPFS_CMD" ] || fatal "IPFS is disabled"
481
    verdocmd $IPFS_CMD --api $IPFS_API $ipfs_diag_timeout diag sys >/dev/null
482 483 484 485
}

ipfs_check()
{
486
    [ -n "$IPFS_CMD" ] || fatal "IPFS is disabled"
487
    verdocmd $IPFS_CMD --api $IPFS_API $ipfs_diag_timeout cat "$1" >/dev/null
488 489 490 491
}



492 493 494
select_ipfs_mode()
{
    IPFS_CMD="$(get_ipfs_brave)"
495 496
    # if no EGET_IPFS_API, check brave
    if [ -z "$EGET_IPFS_API" ] && [ -n "$IPFS_CMD" ] ; then
497 498 499 500 501
        IPFS_API="$ipfs_api_brave"
        if ipfs_access ; then
            if ipfs_check "$ipfs_checkQm" ; then
                ipfs_mode="brave" && return
            else
502
                info "Skipped Brave: it is accessible via $IPFS_CMD --api $IPFS_API, but can't return shared $ipfs_checkQm"
503 504 505 506 507 508 509 510 511 512 513
            fi
        fi
    fi

    IPFS_CMD="$(print_command_path ipfs)"
    if [ -n "$IPFS_CMD" ] ; then
        IPFS_API="$ipfs_api_local"
        if ipfs_access ; then
            if ipfs_check "$ipfs_checkQm" ; then
                ipfs_mode="local" && return
            else
514
                info "Skipped local: it is accessible via $IPFS_CMD --api $IPFS_API, but can't return shared $ipfs_checkQm"
515 516 517 518 519 520 521 522
            fi
        fi
    fi

    # TODO: check checksum
    if docmd eget --check "$ipfs_gateway/$ipfs_checkQm" ; then
        ipfs_mode="gateway"
        return
523 524 525 526 527
    fi

    IPFS_GATEWAY=''
    if docmd eget --check "$(dirname $ipfs_gateway)" ; then
       info "IPFS gateway $ipfs_gateway is accessible, but can't return shared $ipfs_checkQm"
528
    else
529
       info "IPFS gateway $(dirname $ipfs_gateway) is not accessible"
530 531 532 533
    fi

    ipfs_mode="disabled"
}
534 535


536
# Functions for work with eget ipfs db
537 538 539 540
get_cid_by_url()
{
    local URL="$1"
    [ -r "$EGET_IPFS_DB" ] || return
541
    is_fileurl "$URL" && return 1
542 543 544 545 546 547 548 549 550
    grep -F "$URL Qm" "$EGET_IPFS_DB" | head -n1 | cut -f2 -d" "
}

put_cid_and_url()
{
    local URL="$1"
    local CID="$2"
    local FN="$3"
    [ -w "$EGET_IPFS_DB" ] || return
551 552 553

    is_fileurl "$URL" && return

554
    echo "$URL $CID $FN" >> "$EGET_IPFS_DB"
555
    echo "Placed in $EGET_IPFS_DB: $URL $CID $FN"
556 557 558 559 560
}

get_filename_by_cid()
{
    local CID="$1"
561
    [ -z "$EGET_IPFS_DB" ] && basename "$CID" && return
562 563 564
    grep -F " $CID " "$EGET_IPFS_DB" | head -n1 | cut -f3 -d" "
}

565 566 567 568 569 570 571
get_url_by_cid()
{
    local CID="$1"
    [ -z "$EGET_IPFS_DB" ] && echo "$CID" && return
    grep -F " $CID " "$EGET_IPFS_DB" | head -n1 | cut -f1 -d" "
}

572 573
###################

574

575 576 577 578 579
ipfs_mode="$EGET_IPFS"

# enable auto mode when set $EGET_IPFS_DB
[ -z "$ipfs_mode" ] && [ -n "$EGET_IPFS_DB" ] && ipfs_mode="auto"

580 581 582 583 584
if [ -n "$LISTONLY$CHECKURL" ] ; then
    ipfs_mode="disabled"
    EGET_IPFS_DB=''
fi

585 586 587 588 589 590

if [ "$ipfs_mode" != "disabled" ] && [ -n "$EGET_IPFS_DB" ] ; then
    ddb="$(dirname "$EGET_IPFS_DB")"
    if [ -d "$ddb" ] ; then
        info "Using eget IPFS db $EGET_IPFS_DB"
        [ -r "$EGET_IPFS_DB" ] || touch "$EGET_IPFS_DB"
591 592 593 594 595 596
    else
        EGET_IPFS_DB=''
    fi
fi


597
# detect if we run with ipfs:// or with auto
598
if is_ipfsurl "$1" && [ -z "$ipfs_mode" ] || [ "$ipfs_mode" = "auto" ] ; then
Vitaly Lipatov's avatar
Vitaly Lipatov committed
599
    info "Autodetecting for available IPFS relay..."
600 601 602
    select_ipfs_mode
    info "Auto selected IPFS mode: $ipfs_mode"
else
603
    [ -n "$ipfs_mode" ] && [ "$ipfs_mode" != "disabled" ] && info "IPFS mode: $ipfs_mode"
604 605
fi

606 607 608 609 610 611
IPFS_CMD=''

if [ "$ipfs_mode" = "disabled" ] ; then

ipfs_get()
{
612
    fatal "IPFS is disabled"
613 614 615 616
}

ipfs_put()
{
617
    fatal "IPFS is disabled"
618 619 620 621
}

ipfs_cat()
{
622
    fatal "IPFS is disabled"
623 624 625 626 627
}


elif [ "$ipfs_mode" = "brave" ] ; then
    IPFS_CMD="$(get_ipfs_brave)" || fatal "Can't find ipfs command in Brave"
628
    IPFS_PRETTY_CMD="~Brave-Browser/$(basename $IPFS_CMD)"
629 630
    IPFS_API="$ipfs_api_brave"
    ipfs_access || fatal "Can't access to Brave IPFS API (Brave browser is not running and IPFS is not activated?)"
631
    info "Will use $IPFS_PRETTY_CMD --api $IPFS_API"
632 633 634

elif [ "$ipfs_mode" = "local" ] ; then
    IPFS_CMD="$(print_command_path ipfs)" || fatal "Can't find ipfs command"
635
    IPFS_PRETTY_CMD="$IPFS_CMD"
636 637
    IPFS_API="$ipfs_api_local"
    ipfs_access || fatal "Can't access to IPFS API (ipfs daemon is not running?)"
638
    info "Will use $IPFS_PRETTY_CMD --api $IPFS_API"
639 640

elif [ "$ipfs_mode" = "gateway" ] ; then
641
    info "Will use eget $IPFS_GATEWAY/HASH"
642 643

ipfs_get_real_url()
644 645
{
    [ -n "$IPFS_GATEWAY" ] || fatal "ipfs http gateway is not set"
646 647 648 649 650
    echo "$IPFS_GATEWAY/$1"
}

ipfs_get()
{
651
    if [ -n "$2" ] ; then
652
        docmd eget -O "$2" "$(ipfs_get_real_url "$1")"
653
    else
654
        docmd eget "$(ipfs_get_real_url "$1")"
655 656 657 658 659 660 661 662 663 664 665
    fi
}

ipfs_cat()
{
    # FIXME:
    ipfs_get "$1" "-"
}

ipfs_put()
{
666 667
    info "IPFS put skipped when a gateway is used"
    return 1
668 669 670 671 672 673 674 675 676
}
elif [ -z "$ipfs_mode" ] ; then
    :
else
    fatal "Unsupported eget ipfs mode $ipfs_mode"
fi

if [ -n "$IPFS_CMD" ] ; then

677 678 679 680 681
ipfs_get_real_url()
{
    return 1
}

682 683 684 685
ipfs_get()
{
    [ -n "$IPFS_CMD" ] || fatal "ipfs api is not usable"
    if [ -n "$2" ] ; then
686 687
        showcmd $IPFS_PRETTY_CMD --api $IPFS_API get -o "$2" "$1"
        $IPFS_CMD --api $IPFS_API get -o "$2" "$1"
688
    else
689 690
        showcmd $IPFS_PRETTY_CMD --api $IPFS_API get "$1"
        $IPFS_CMD --api $IPFS_API get "$1"
691 692 693 694 695 696
    fi
}

ipfs_put()
{
    [ -n "$IPFS_CMD" ] || fatal "ipfs api is not usable"
697 698 699 700 701

    # detect if -q is used (will output Qm instead of addded Qm)
    local qu="$1"
    [ "$qu" = "-q" ] || qu=''

702
    showcmd $IPFS_PRETTY_CMD --api $IPFS_API add "$@"
703 704 705 706 707 708 709 710 711 712 713

    local res
    res="$($IPFS_CMD --api $IPFS_API add "$@")" || return

    if [ -z "$qu" ] ; then
        res="$(echo "$res" | grep "^added Qm")" || return
        res="$(echo "$res" | cut -f2 -d" ")"
    fi

    is_ipfs_hash "$res" && echo "$res" && return
    fatal "Can't recognize $res IPFS hash"
714 715 716 717 718
}

ipfs_cat()
{
    [ -n "$IPFS_CMD" ] || fatal "ipfs api is not usable"
719 720
    showcmd $IPFS_PRETTY_CMD --api $IPFS_API cat "$1"
    $IPFS_CMD --api $IPFS_API cat "$1"
721 722 723 724 725
}

fi
###############################

726 727


728
WGET="$(print_command_path wget)"
729
CURL="$(print_command_path curl)"
730

731

732
if is_fileurl "$1" ; then
733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760
    EGET_BACKEND="file"
elif is_ipfsurl "$1" ; then
    EGET_BACKEND="ipfs"
fi


case "$EGET_BACKEND" in
    file|ipfs)
        ;;
    wget)
        [ -n "$WGET" ] || fatal "There are no wget in the system but you forced using it via EGET_BACKEND. Install it with $ epm install wget"
        ;;
    curl)
        [ -n "$CURL" ] || fatal "There are no curl in the system but you forced using it via EGET_BACKEND. Install it with $ epm install curl"
        ;;
    '')
        [ -n "$WGET" ] && EGET_BACKEND="wget"
        [ -z "$EGET_BACKEND" ] && [ -n "$CURL" ] && EGET_BACKEND="curl"
        [ -n "$EGET_BACKEND" ] || fatal "There are no wget nor curl in the system. Install something with $ epm install wget"
        ;;
    *)
        fatal "Uknown EGET_BACKEND $EGET_BACKEND"
        ;;
esac



if [ "$EGET_BACKEND" = "file" ] ; then
761 762

# put remote content to stdout
763
url_scat()
764 765
{
    local URL="$1"
766
    cat "$(path_from_url "$URL")"
767 768
}
# download to default name of to $2
769
url_sget()
770 771 772 773 774 775
{
    local URL="$1"
    if [ "$2" = "/dev/stdout" ] || [ "$2" = "-" ] ; then
       scat "$URL"
       return
    elif [ -n "$2" ] ; then
776
       cp -av "$(path_from_url "$URL")" "$2"
777 778
       return
    fi
779
    cp -av "$(path_from_url "$URL")" .
780 781
}

782
url_check()
783 784
{
    local URL="$1"
785
    test -f "$(path_from_url "$URL")"
786 787
}

788 789 790 791 792
url_get_filename()
{
    basename "$1"
}

793 794 795 796 797
url_get_real_url()
{
    echo "$1"
}

798
elif [ "$EGET_BACKEND" = "ipfs" ] ; then
799 800

# put remote content to stdout
801
url_scat()
802 803
{
    local URL="$1"
804
    ipfs_cat "$(cid_from_url "$URL")"
805 806
}
# download to default name of to $2
807
url_sget()
808 809 810 811 812 813
{
    local URL="$1"
    if [ "$2" = "/dev/stdout" ] || [ "$2" = "-" ] ; then
       scat "$URL"
       return
    elif [ -n "$2" ] ; then
814
       ipfs_get "$(cid_from_url "$URL")" "$2"
815 816
       return
    fi
817 818 819 820 821 822 823

    local fn="$(url_print_filename_from_url "$URL")"
    if [ -n "$fn" ] ; then
       ipfs_get "$(cid_from_url "$URL")" "$fn"
       return
    fi

824
    ipfs_get "$(cid_from_url "$URL")"
825 826
}

827
url_check()
828 829 830 831 832 833
{
    local URL="$1"
    # TODO: improve me
    scat "$URL" >/dev/null
}

834 835 836 837 838 839 840
url_print_filename_from_url()
{
    local URL="$1"
    local fn="$(echo "$URL" | sed -e 's|ipfs://.*\?filename=||')"
    [ "$URL" != "$fn" ] && echo "$fn" && return
}

841 842
url_get_filename()
{
843
    local URL="$1"
844
    url_print_filename_from_url "$URL" && return
845 846 847 848
    local CID="$(cid_from_url "$URL")"
    get_filename_by_cid "$CID"
}

849 850 851 852
url_get_real_url()
{
    local URL="$1"
    local CID="$(cid_from_url "$URL")"
853 854
    # if we use gateway, return URL with gateway
    ipfs_get_real_url "$URL" && return
855 856 857
    get_url_by_cid "$CID"
}

858

859
elif [ "$EGET_BACKEND" = "wget" ] ; then
860 861 862
__wget()
{
    if [ -n "$WGETUSERAGENT" ] ; then
863
        docmd $WGET $FORCEIPV $WGETQ $WGETCOMPRESSED $WGETNOSSLCHECK "$WGETUSERAGENT" "$@"
864
    else
865
        docmd $WGET $FORCEIPV $WGETQ $WGETCOMPRESSED $WGETNOSSLCHECK "$@"
866 867
    fi
}
868

869
# put remote content to stdout
870
url_scat()
871
{
872
    local URL="$1"
873
    download_with_mirroring __wget "$URL" -O-
874 875
}
# download to default name of to $2
876
url_sget()
877
{
878
    local URL="$1"
879
    if [ "$2" = "/dev/stdout" ] || [ "$2" = "-" ] ; then
880 881
       scat "$URL"
       return
882
    elif [ -n "$2" ] ; then
883
       download_with_mirroring __wget "$URL" -O "$2"
884 885
       return
    fi
886 887 888 889
# TODO: поддержка rsync для известных хостов?
# Не качать, если одинаковый размер и дата
# -nc
# TODO: overwrite always
890
    download_with_mirroring __wget "$URL" $WGETNAMEOPTIONS
891 892
}

893
url_get_response()
894 895
{
    local URL="$1"
896
    local answer
897
    answer="$(quiet=1 __wget --spider -S "$URL" 2>&1)"
898 899
    # HTTP/1.1 405 Method Not Allowed
    if echo "$answer" | grep -q "^ *HTTP/[12.]* 405" ; then
900
        (quiet=1 __wget --start-pos=5000G -S "$URL" 2>&1)
901 902 903
        return
    fi
    echo "$answer"
904 905
}

906

907
elif [ "$EGET_BACKEND" = "curl" ] ; then
908

909 910 911
__curl()
{
    if [ -n "$CURLUSERAGENT" ] ; then
912
        docmd $CURL $FORCEIPV --fail -L $CURLQ $CURLCOMPRESSED "$CURLUSERAGENT" $CURLNOSSLCHECK "$@"
913
    else
914
        docmd $CURL $FORCEIPV --fail -L $CURLQ $CURLCOMPRESSED $CURLNOSSLCHECK "$@"
915 916
    fi
}
917
# put remote content to stdout
918
url_scat()
919
{
920
    local URL="$1"
921
    download_with_mirroring __curl "$URL" --output -
922 923
}
# download to default name of to $2
924
url_sget()
925
{
926 927
    local URL="$1"
    local res
928 929
    if [ "$2" = "/dev/stdout" ] || [ "$2" = "-" ] ; then
       scat "$1"
930
       return
931
    elif [ -n "$2" ] ; then
932
       download_with_mirroring __curl "$URL" --output "$2"
933
       return
934
    fi
935

936
    download_with_mirroring __curl "$URL" $CURLNAMEOPTIONS
937
}
938

939
url_get_response()
940 941
{
    local URL="$1"
942
    local answer
943
    answer="$(quiet=1 __curl -LI "$URL" 2>&1)"
944 945
    # HTTP/1.1 405 Method Not Allowed
    if echo "$answer" | grep -q "^ *HTTP/[12.]* 405" ; then
946
        (quiet=1 __curl -L -i -r0-0 "$URL" 2>&1)
947 948 949
        return
    fi
    echo "$answer"
950 951
}

952

953 954 955 956 957 958
fi


# Common code for both wget and curl (http related)
if [ "$EGET_BACKEND" = "wget" ] || [ "$EGET_BACKEND" = "curl" ] ; then

959 960 961 962 963 964 965 966 967
url_get_headers()
{
    local URL="$1"
    url_get_response "$URL" | grep -i "^ *[[:alpha:]].*: " | sed -e 's|^ *||' -e 's|\r$||'
}

url_check()
{
    local URL="$1"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
968
    url_get_response "$URL" | grep "HTTP/" | tail -n1 | grep -q -w "200\|404"
969 970
}

971 972 973 974
url_get_header()
{
    local URL="$1"
    local HEADER="$2"
975
    url_get_headers "$URL" | grep -i "^ *$HEADER: " | sed -e "s|^ *$HEADER: ||i"
976 977
}

978 979 980 981 982 983 984 985 986
url_get_real_url()
{
    local URL="$1"

    ! is_httpurl "$URL" && echo "$URL" && return

    # don't check location if we have made form of the URL
    [ -n "$MADEURL" ] && [ "$MADEURL" = "$URL" ] && echo "$URL" && return

987
    local loc
988
    for loc in $(url_get_header "$URL" "Location" | tac | sed -e 's| .*||') ; do
989 990 991 992 993
        if ! is_strange_url "$loc" ; then
            echo "$loc"
            return
        fi
    done
994 995 996 997

    echo "$URL"
}

998 999
url_get_filename()
{
1000
    local URL="$1"
1001 1002 1003

    ! is_httpurl "$URL" && basename "$URL" && return

1004
    # FIXME with wget
1005
    local cd="$(url_get_header "$URL" "Content-Disposition")"
1006 1007
    if echo "$cd" | grep -q "filename=" ; then
        #Content-Disposition: attachment; filename=postman-linux-x64.tar.gz
1008
        #content-disposition: attachment; filename="code-1.77.1-1680651749.el7.x86_64.rpm"
1009
        echo "$cd" | sed -e 's|.*filename=||' -e 's|^"||' -e 's|";$||' -e 's|"$||'
1010 1011 1012
        return
    fi

1013
    basename "$(url_get_real_url "$URL")"
1014 1015
}

1016 1017
fi

1018

1019
if [ "$ipfs_mode" != "disabled" ] && [ -n "$EGET_IPFS_DB" ] &&  ! is_ipfsurl "$1"  ; then
1020

1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033
download_to_ipfs()
{
    local URL="$1"
    local res
    #res="$(url_scat "$URL" | ipfs_put )" || return
    #res="$(echo "$res" | grep "^added Qm")" || return 1
    #CID="$(echo "$res" | cut -f2 -d" ")"
    # with -q to disable progress (mixed with download progress)
    res="$(url_scat "$URL" | ipfs_put -q)" || return
    is_ipfs_hash "$res" || return 1
    echo "$res"
}

1034 1035 1036 1037
# put remote content to stdout
scat()
{
    local URL="$1"
1038 1039 1040
    url_scat "$URL"

    # It is list only function. Don't save to IPFS
1041 1042 1043
    return

    ###################
1044

1045 1046 1047 1048 1049 1050 1051
    local CID="$(get_cid_by_url "$URL")"
    if [ -n "$CID" ] ; then
        info "$URL -> $CID"
        ipfs_cat "$CID"
        return
    fi

1052
    CID="$(download_to_ipfs "$URL")" || return
1053 1054 1055 1056 1057 1058 1059

    ipfs_cat "$CID" || return

    local FN="$(url_get_filename "$URL")" || return

    put_cid_and_url "$URL" "$CID" "$FN"
}
1060

1061 1062 1063 1064
# download to default name of to $2
sget()
{
    local URL="$1"
1065
    local TARGET="$2"
1066 1067 1068 1069

    if [ -n "$GETFILENAME" ] ; then
        get_filename "$URL"
        exit
1070 1071
    fi

Vitaly Lipatov's avatar
Vitaly Lipatov committed
1072
    local REALURL="$(get_real_url "$URL")" || return
1073

Vitaly Lipatov's avatar
Vitaly Lipatov committed
1074 1075 1076 1077 1078
    if [ -n "$GETREALURL" ] ; then
        echo "$REALURL"
        exit
    fi

1079 1080 1081 1082 1083 1084 1085
    # skip ipfs for cat
    if [ "$TARGET" = "/dev/stdout" ] || [ "$TARGET" = "-" ] ; then
       url_scat "$URL"
       return
    fi


1086 1087 1088 1089 1090
    #if is_strange_url "$REALURL" ; then
    #    info "Just download strange URL $REALURL, skipping IPFS"
    #    url_sget "$REALURL" "$TARGET"
    #    return
    #fi
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1091 1092

    local CID="$(get_cid_by_url "$REALURL")"
1093
    if [ -n "$CID" ] ; then
1094

1095 1096 1097 1098 1099
        if [ -n "$GETIPFSCID" ] ; then
            echo "$CID"
            exit
        fi

1100 1101 1102 1103 1104
        if [ -n "$GETFILENAME" ] ; then
            get_filename_by_cid "$CID"
            exit
        fi

1105 1106 1107 1108 1109
        if [ -n "$GETREALURL" ] ; then
            get_url_by_cid "$CID"
            exit
        fi

1110
        if [ -z "$TARGET" ] ; then
1111
            # TODO: in some cases we can get name from URL...
1112 1113 1114 1115 1116
            TARGET="$(get_filename_by_cid "$CID")"
            if [ -z "$TARGET" ] ; then
                TARGET="$CID"
            fi
        fi
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1117
        [ "$URL" = "$REALURL" ] && info "$URL -> $CID -> $TARGET" || info "$URL -> $REALURL -> $CID -> $TARGET"
1118 1119 1120 1121
        ipfs_get "$CID" "$TARGET" && return

        # fail get from IPFS, fallback
        url_sget "$REALURL" "$TARGET"
1122 1123 1124
        return
    fi

1125 1126

    # download and put to IPFS
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1127
    local FN="$(url_get_filename "$REALURL")" || return
1128 1129 1130 1131
    if [ -z "$TARGET" ] ; then
        TARGET="$FN"
    fi

1132
    if [ -n "$GETIPFSCID" ] ; then
1133
         # add to IPFS and print out CID
1134
         CID="$(ipfs_put --progress "$REALURL")" || return
1135 1136
         echo "$CID"
         exit
1137 1138
    fi

1139 1140
    # download file and add to IPFS
    url_sget "$REALURL" "$TARGET" || return
1141 1142 1143 1144

    # don't do ipfs put when gateway is using
    [ "$ipfs_mode" = "gateway" ] && return

1145
    CID="$(ipfs_put --progress "$TARGET")" || return
1146

Vitaly Lipatov's avatar
Vitaly Lipatov committed
1147
    put_cid_and_url "$REALURL" "$CID" "$FN"
1148 1149 1150 1151 1152
}

check_url_is_accessible()
{
    local URL="$1"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1153 1154
    local REALURL="$(get_real_url "$URL")" || return
    local CID="$(get_cid_by_url "$REALURL")"
1155
    if [ -n "$CID" ] ; then
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1156
        [ "$URL" = "$REALURL" ] && info "$URL -> $CID" || info "$URL -> $REALURL -> $CID"
1157 1158 1159 1160
        ipfs_check "$CID"
        return
    fi

1161
    CID="$(download_to_ipfs "$REALURL")" || return
1162

Vitaly Lipatov's avatar
Vitaly Lipatov committed
1163
    local FN="$(url_get_filename "$REALURL")" || return
1164
    ipfs_cat "$CID" >/dev/null || return
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1165
    put_cid_and_url "$REALURL" "$CID" "$FN"
1166 1167 1168 1169 1170 1171 1172
}

get_filename()
{
    url_get_filename "$1"
}

1173 1174 1175 1176 1177
get_real_url()
{
    url_get_real_url "$1"
}

1178
else
1179 1180 1181 1182 1183 1184 1185
scat()
{
    url_scat "$@"
}

sget()
{
1186 1187 1188 1189 1190
    if [ -n "$GETFILENAME" ] ; then
        get_filename "$1"
        exit
    fi

1191 1192 1193 1194 1195
    if [ -n "$GETREALURL" ] ; then
        get_real_url "$1"
        exit
    fi

1196 1197 1198 1199 1200 1201 1202
    url_sget "$@"
}

check_url_is_accessible()
{
    url_check "$@"
}
1203 1204 1205 1206 1207 1208

get_filename()
{
    url_get_filename "$1"
}

Vitaly Lipatov's avatar
Vitaly Lipatov committed
1209 1210 1211 1212 1213
get_real_url()
{
    url_get_real_url "$1"
}

1214 1215 1216
fi


1217 1218 1219 1220 1221 1222 1223
get_github_urls()
{
    # https://github.com/OWNER/PROJECT
    local owner="$(echo "$1" | sed -e "s|^https://github.com/||" -e "s|/.*||")" #"
    local project="$(echo "$1" | sed -e "s|^https://github.com/$owner/||" -e "s|/.*||")" #"
    [ -n "$owner" ] || fatal "Can't get owner from $1"
    [ -n "$project" ] || fatal "Can't get project from $1"
1224
    local URL="https://api.github.com/repos/$owner/$project/releases"
1225
    scat $URL | \
1226 1227 1228
        grep -i -o -E '"browser_download_url": "https://.*"' | cut -d'"' -f4
}

1229 1230 1231 1232 1233 1234
# drop file path from URL
get_host_only()
{
    echo "$1/" | grep -Eo '(.*://[^/]+)'
}

1235 1236 1237 1238 1239 1240 1241 1242
concatenate_url_and_filename()
{
    local url="$1"
    local fn="$2"
    # workaround for a slash in the end of URL
    echo "$(echo "$url" | sed -e 's|/*$||' )/$fn"
}

1243 1244 1245
# MADEURL filled with latest made URL as flag it is end form of URL
MADEURL=''

1246 1247 1248 1249 1250
# Args: URL filename
make_fileurl()
{
    local url="$1"
    local fn="$2"
1251

1252
    fn="$(echo "$fn" | sed -e 's|^./||' -e 's|^/+||')"
1253

1254
    if is_fileurl "$url" ; then
1255 1256 1257 1258 1259 1260 1261 1262
        # if it is url
        :
    elif echo "$fn" | grep -q "^/" ; then
        # if there is file path from the root of the site
        url="$(get_host_only "$url")"
    elif echo "$url" | grep -q -v "/$" ; then
        # if there is no slash in the end of URL
        url="$(dirname "$url")"
1263
    fi
1264

1265 1266
    MADEURL="$(concatenate_url_and_filename "$url" "$fn")"
    echo "$MADEURL"
1267 1268
}

1269
get_urls()
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1270
{
1271
    if is_fileurl "$URL" ; then
1272
        ls -1 "$(path_from_url "$URL")"
1273 1274 1275
        return
    fi

1276
    # cat html, divide to lines by tags and cut off hrefs only
1277
    scat $URL | sed -e 's|<|<\n|g' -e 's|data-file=|href=|g' -e "s|'|\"|g" | \
1278
         grep -i -o -E 'href="(.+)"' | cut -d'"' -f2
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1279 1280
}

1281 1282

if [ -n "$CHECKURL" ] ; then
1283
    #set_quiet
1284 1285 1286 1287 1288 1289 1290
    URL="$1"
    check_url_is_accessible "$URL"
    res=$?
    if [ -n "$verbose" ] ; then
        [ "$res" = "0" ] && echo "$URL is accessible via network" || echo "$URL is NOT accessible via network"
    fi
    exit $res
1291 1292
fi

1293 1294 1295 1296 1297
if [ -n "$GETRESPONSE" ] ; then
    url_get_response "$1"
    exit
fi

1298 1299

# separate part for github downloads
1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310
if echo "$1" | grep -q "^https://github.com/" && \
   echo "$1" | grep -q -v "/download/" && [ -n "$2" ] ; then
    MASK="$2"

    if [ -n "$LISTONLY" ] ; then
        get_github_urls "$1" | filter_glob "$MASK" | filter_order
        exit
    fi

    ERROR=0
    for fn in $(get_github_urls "$1" | filter_glob "$MASK" | filter_order) ; do
1311
        MADEURL="$fn" # mark it is the end form of the URL
1312 1313 1314 1315 1316 1317
        sget "$fn" "$TARGETFILE" || ERROR=1
        [ -n "$TARGETFILE" ] && [ "$ERROR" = "0" ] && break
    done
    exit
fi

1318 1319 1320 1321 1322
if is_ipfsurl "$1" ; then
    [ -n "$2" ] && fatal "too many args when ipfs://Qm... used: extra '$2' arg"
    sget "$1" "$TARGETFILE"
    exit
fi
1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335

# if mask is the second arg
if [ -n "$2" ] ; then
    URL="$1"
    MASK="$2"
else
    # do not support / at the end without separately specified mask
    if echo "$1" | grep -q "/$" ; then
        fatal "Use http://example.com/e/* to download all files in dir"
    fi

    # drop mask part
    URL="$(dirname "$1")/"
1336 1337
    # wildcards allowed only in the last part of path
    MASK=$(basename "$1")
1338 1339
fi

1340 1341 1342
# https://www.freeoffice.com/download.php?filename=freeoffice-2021-1062.x86_64.rpm
if echo "$URL" | grep -q "[*]" ; then
    fatal "Error: there are globbing symbol (*) in $URL. It is allowed only for mask part"
1343 1344
fi

1345 1346
is_url "$MASK" && fatal "eget supports only one URL as argument"
[ -n "$3" ] && fatal "too many args: extra '$3'. May be you need use quotes for arg with wildcards."
1347 1348 1349 1350 1351 1352 1353 1354 1355 1356

# TODO: curl?
# If ftp protocol, just download
if echo "$URL" | grep -q "^ftp://" ; then
    [ -n "$LISTONLY" ] && fatal "TODO: list files for ftp:// is not supported yet"
    sget "$1" "$TARGETFILE"
    exit
fi


1357 1358 1359 1360 1361 1362 1363 1364
if [ -n "$LISTONLY" ] ; then
    for fn in $(get_urls | filter_glob "$MASK" | filter_order) ; do
        is_url "$fn" && echo "$fn" && continue
        make_fileurl "$URL" "$fn"
    done
    exit
fi

1365 1366 1367 1368 1369 1370
# If there is no wildcard symbol like asterisk, just download
if echo "$MASK" | grep -qv "[*?]" || echo "$MASK" | grep -q "[?].*="; then
    sget "$1" "$TARGETFILE"
    exit
fi

1371
ERROR=0
1372
for fn in $(get_urls | filter_glob "$MASK" | filter_order) ; do
1373
    is_url "$fn" || fn="$(make_fileurl "$URL" "$fn" )" #"
1374 1375
    sget "$fn" "$TARGETFILE" || ERROR=1
    [ -n "$TARGETFILE" ] && [ "$ERROR" = "0" ] && break
1376 1377
done
exit $ERROR
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1378