tools_eget 39.2 KB
Newer Older
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1
#!/bin/sh
2

3
# eget - simply shell on wget for loading directories over http (wget does not support wildcard for http)
4
# Use:
5
# eget http://ftp.altlinux.ru/pub/security/ssl/*
Vitaly Lipatov's avatar
Vitaly Lipatov committed
6
#
7
# Copyright (C) 2014-2014, 2016, 2020, 2022  Etersoft
8
# Copyright (C) 2014 Daniil Mikhailov <danil@etersoft.ru>
9
# Copyright (C) 2016-2017, 2020, 2022 Vitaly Lipatov <lav@etersoft.ru>
Vitaly Lipatov's avatar
Vitaly Lipatov committed
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#

25 26
init_eget()
{
27 28 29 30 31 32 33 34
PROGDIR=$(dirname "$0")
PROGNAME=$(basename "$0")
CMDSHELL="/bin/sh"
[ "$PROGDIR" = "." ] && PROGDIR="$(pwd)"
if [ "$0" = "/dev/stdin" ] || [ "$0" = "sh" ] ; then
    PROGDIR=""
    PROGNAME=""
fi
35 36
}
init_eget
37 38


39 40 41 42 43 44
fatal()
{
    echo "FATAL: $*" >&2
    exit 1
}

45 46
info()
{
Vitaly Lipatov's avatar
Vitaly Lipatov committed
47
    [ -n "$quiet" ] && return
48 49 50 51 52
    echo "$*" >&2
}

eget()
{
53 54
	if [ -n "$EPMMODE" ] ; then
		# if embedded in epm
55
		(unset EGET_IPFS_GATEWAY; unset EGET_IPFS_API ; unset EGET_IPFS_DB ; EGET_BACKEND=$ORIG_EGET_BACKEND internal_tools_eget "$@" )
56 57
		return
	fi
58

59
	[ -n "$PROGNAME" ] || fatal "pipe mode is not supported"
60

61 62 63
	local bashopt=''
	#[ -n "$verbose" ] && bashopt='-x'

64
	(unset EGET_IPFS_GATEWAY; unset EGET_IPFS_API ; unset EGET_IPFS_DB ; EGET_BACKEND=$ORIG_EGET_BACKEND $CMDSHELL $bashopt $PROGDIR/$PROGNAME "$@" )
65
}
66

67 68
# TODO:
arch="$(uname -m)"
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90

# copied from eepm project

# copied from /etc/init.d/outformat (ALT Linux)
isatty()
{
	# Set a sane TERM required for tput
	[ -n "$TERM" ] || TERM=dumb
	export TERM
	test -t 1
}

isatty2()
{
	# check stderr
	test -t 2
}


check_tty()
{
	isatty || return
91
	is_command tput >/dev/null 2>/dev/null || return
92
	# FreeBSD does not support tput -S
93 94
	echo | a= tput -S >/dev/null 2>/dev/null || return
	export USETTY="tput -S"
95 96 97 98 99 100
}

: ${BLACK:=0} ${RED:=1} ${GREEN:=2} ${YELLOW:=3} ${BLUE:=4} ${MAGENTA:=5} ${CYAN:=6} ${WHITE:=7}

set_boldcolor()
{
101
	[ -n "$USETTY" ] || return
102 103 104
	{
		echo bold
		echo setaf $1
105 106 107 108 109 110 111 112 113
	} | $USETTY
}

set_color()
{
	[ -n "$USETTY" ] || return
	{
		echo setaf $1
	} | $USETTY
114 115 116 117
}

restore_color()
{
118
	[ -n "$USETTY" ] || return
119 120 121
	{
		echo op; # set Original color Pair.
		echo sgr0; # turn off all special graphics mode (bold in our case).
122
	} | $USETTY
123 124
}

125

126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
echover()
{
    [ -n "$verbose" ] || return
    echo "$*" >&2
}

# Print command line and run command line
showcmd()
{
	if [ -z "$quiet" ] ; then
		set_boldcolor $GREEN
		local PROMTSIG="\$"
		[ "$UID" = 0 ] && PROMTSIG="#"
		echo " $PROMTSIG $@"
		restore_color
	fi >&2
}

# Print command line and run command line
docmd()
{
	showcmd "$@"
	"$@"
}

151 152 153 154 155 156
verdocmd()
{
	[ -n "$verbose" ] && showcmd "$@"
	"$@"
}

157

158 159
# copied from epm
# print a path to the command if exists in $PATH
160
if a= which which 2>/dev/null >/dev/null ; then
161 162 163
    # the best case if we have which command (other ways needs checking)
    # TODO: don't use which at all, it is binary, not builtin shell command
print_command_path()
164
{
165
    a= which -- "$1" 2>/dev/null
166
}
167
elif a= type -a type 2>/dev/null >/dev/null ; then
168 169
print_command_path()
{
170
    a= type -fpP -- "$1" 2>/dev/null
171 172 173 174
}
else
print_command_path()
{
175
    a= type "$1" 2>/dev/null | sed -e 's|.* /|/|'
176
}
Vitaly Lipatov's avatar
Vitaly Lipatov committed
177 178
fi

179 180 181 182 183 184
# check if <arg> is a real command
is_command()
{
    print_command_path "$1" >/dev/null
}

Vitaly Lipatov's avatar
Vitaly Lipatov committed
185 186 187 188 189 190 191 192 193
# add realpath if missed
if ! is_command realpath ; then
realpath()
{
    [ -n "$*" ] || return
    readlink -f "$@"
}
fi

194

195 196 197 198 199
# check man glob
filter_glob()
{
	[ -z "$1" ] && cat && return
	# translate glob to regexp
200
	grep "$(echo "$1" | sed -e 's|\.|\\.|g' -e 's|\*|.*|g' -e 's|\?|.|g' )$"
201 202 203 204 205 206 207 208 209 210 211 212
}

filter_order()
{
    if [ -n "$SECONDLATEST" ] ; then
        sort -V | tail -n2 | head -n1
        return
    fi
    [ -z "$LATEST" ] && cat && return
    sort -V | tail -n1
}

213
have_end_slash_or_php_parametr()
214
{
215
    echo "$1" | grep -qE '(/$|\.php($|\?))'
216 217
}

218 219 220 221
is_abs_path()
{
    echo "$1" | grep -q '^/'
}
222 223 224

is_fileurl()
{
225
    is_abs_path "$1" && return
226
    echo "$1" | grep -q "^file:/"
227 228
}

229
path_from_url()
230 231 232 233 234 235
{
    echo "$1" | sed -e 's|^file://*|/|'
}

is_url()
{
236
    echo "$1" | grep -q "^[filehtps]*:/"
237 238
}

239 240 241 242 243 244 245
is_strange_url()
{
    local URL="$1"
    is_url "$URL" || return
    echo "$URL" | grep -q "[?&]"
}

246 247
is_ipfs_hash()
{
248 249 250 251
    # If a CID is 46 characters starting with "Qm", it's a CIDv0
    echo "$1" | grep -q -E "^Qm[[:alnum:]]{44}$" && return
    # TODO: CIDv1 support, see https://github.com/multiformats/cid
    return 1
252 253 254 255 256 257 258 259
}

is_ipfsurl()
{
    is_ipfs_hash "$1" && return
    echo "$1" | grep -q "^ipfs://"
}

260 261 262 263 264 265 266
is_httpurl()
{
    # TODO: improve
    echo "$1" | grep -q "^https://" & return
    echo "$1" | grep -q "^http://" & return
}

267 268
cid_from_url()
{
269
    echo "$1" | sed -e 's|^ipfs://*||' -e 's|\?.*||'
270 271
}

272

273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
# args: cmd <URL> <options>
# will run cmd <options> <URL>
download_with_mirroring()
{
    local CMD="$1"
    shift
    local URL="$1"
    shift

    local res
    $CMD "$@" "$URL" && return
    res=$?
    [ -n "$CHECKMIRRORS" ] || return $res

    MIRROR="https://mirror.eterfund.ru"
    SECONDURL="$(echo "$URL" | sed -e "s|^.*://|$MIRROR/|")"
    $CMD "$@" "$SECONDURL" && URL="$SECONDURL" && return

    MIRROR="https://mirror.eterfund.org"
    SECONDURL="$(echo "$URL" | sed -e "s|^.*://|$MIRROR/|")"
    $CMD "$@" "$SECONDURL" && URL="$SECONDURL" && return
}

296

297 298 299

check_tty

Vitaly Lipatov's avatar
Vitaly Lipatov committed
300
quiet=''
301
verbose=''
302 303
WGETNOSSLCHECK=''
CURLNOSSLCHECK=''
304
AXELNOSSLCHECK=''
305 306
WGETUSERAGENT=''
CURLUSERAGENT=''
307
AXELUSERAGENT=''
308 309 310
WGETHEADER=''
CURLHEADER=''
AXELHEADER=''
311 312
WGETCOMPRESSED=''
CURLCOMPRESSED=''
313
AXELCOMPRESSED=''
314 315
WGETQ='' #-q
CURLQ='' #-s
316
AXELQ='' #-q
317 318
# TODO: aria2c
# TODO: 
319
WGETNAMEOPTIONS='--content-disposition'
320
CURLNAMEOPTIONS='--remote-name --remote-time --remote-header-name'
321
AXELNAMEOPTIONS=''
322 323
WGETRUSTSERVERNAMES=''
CURLTRUSTSERVERNAMES=''
324

325 326
CURLOUTPUTDIR=''
WGETOUTPUTDIR=''
327 328 329 330 331 332 333 334
WGETNODIRECTORIES=''
WGETCONTINUE=''
CURLCONTINUE=''
WGETTIMEOUT=''
CURLMAXTIME=''
WGETREADTIMEOUT=''
WGETRETRYCONNREFUSED=''
CURLRETRYCONNREFUSED=''
335
WGETTRIES='--tries 1'
336 337 338
CURLRETRY=''
WGETLOADCOOKIES=''
CURLCOOKIE=''
339

340 341
LISTONLY=''
CHECKURL=''
342
CHECKSITE=''
343
GETRESPONSE=''
344
GETFILENAME=''
345
GETREALURL=''
346
GETIPFSCID=''
347 348 349 350
LATEST=''
SECONDLATEST=''
CHECKMIRRORS=''
TARGETFILE=''
351
FORCEIPV=''
352

353

354 355 356 357
set_quiet()
{
    WGETQ='-q'
    CURLQ='-s'
358
    AXELQ='-q'
Vitaly Lipatov's avatar
Vitaly Lipatov committed
359
    quiet=1
360 361 362
}


363 364
eget_help()
{
365
cat <<EOF
366 367 368 369 370

eget - wget like downloader wrapper with wildcard support in filename part of URL
Usage: eget [options] http://somesite.ru/dir/na*.log

Options:
371
    -q|--quiet                - quiet mode
372
    --verbose                 - verbose mode
373
    -k|--no-check-certificate - skip SSL certificate chain support
374
    -H|--header               - use <header> (X-Cache:1 for example)
375
    -U|-A|--user-agent        - send browser like UserAgent
376
    --compressed              - request a compressed response and automatically decompress the content
377 378
    -4|--ipv4|--inet4-only    - use only IPV4
    -6|--ipv6|--inet6-only    - use only IPV6
379 380
    -O-|-O -                  - output downloaded file to stdout
    -O file                   - download to this file
381 382
    -P|--output-dir           - download to this directory

383 384 385 386 387 388 389
    -nd|--no-directories      - do not create a hierarchy of directories when retrieving recursively
    -c|--continue             - continue getting a partially-downloaded file
    -T|--timeout=N            - set  the network timeout to N seconds
    --read-timeout=N          - set the read (and write) timeout to N seconds
    --retry-connrefused       - consider “connection refused” a transient error and try again
    -t|--tries                - set number of tries to number. Specify 0 or ‘inf’ for infinite retrying
    --load-cookies file       - load cookies from file before the first HTTP retrieval
390 391 392
    --latest                  - print only latest version of a file
    --second-latest           - print only second to latest version of a file
    --allow-mirrors           - check mirrors if url is not accessible
393
    --trust-server-names      - use the name specified by the redirection
394 395

    --list|--list-only        - print only URLs
396
    --check-url URL           - check if the URL exists (returns HTTP 200 OK)
397
    --check-site URL          - check if the site is accessible (returns HTTP 200 OK or 404 Not found)
398
    --get-response URL        - get response with all headers (ever if HEAD is not acceptable)
399
    --get-filename URL        - print filename for the URL (via Content-Disposition if applicable)
400
    --get-real-url URL        - print URL after all redirects
401
    --get-ipfs-cid URL        - print CID for URL (after all redirects)
402

403 404 405
Supported URLs:
  ftp:// http:// https:// file:/ ipfs://

406 407
Supported backends (set like EGET_BACKEND=curl)
  wget curl (todo: aria2c)
408

409 410 411 412
Examples:
  $ eget http://ftp.somesite.ru/package-*.x64.tar
  $ eget http://ftp.somesite.ru/package *.tar
  $ eget https://github.com/owner/project package*.ext
413
  $ eget -O myname ipfs://QmVRUjnsnxHWkjq91KreCpUk4D9oZEbMwNQ3rzdjwND5dR
414
  $ eget --list http://ftp.somesite.ru/package-*.tar
415
  $ eget --check-url http://ftp.somesite.ru/test
416 417
  $ eget --list http://download.somesite.ru 'package-*.tar.xz'
  $ eget --list --latest https://github.com/telegramdesktop/tdesktop/releases 'tsetup.*.tar.xz'
418

419 420
EOF
}
421

422 423 424 425
if [ -z "$1" ] ; then
    echo "eget - wget like downloader wrapper with wildcard support, uses wget or curl as backend" >&2
    echo "Run $0 --help to get help" >&2
    exit 1
426 427 428
fi


429 430
while [ -n "$1" ] ; do

431 432 433
    argument="$(echo $1 | cut -d= -f1)"
    argvalue="$(echo $1 | cut -s -d= -f2)"
    case "$argument" in
434 435 436 437
        -h|--help)
            eget_help
            exit
            ;;
438
        -q|--quiet)
439 440
            set_quiet
            ;;
441 442 443
        --verbose)
            verbose="$1"
            ;;
444 445 446
        -k|--no-check-certificate)
            WGETNOSSLCHECK='--no-check-certificate'
            CURLNOSSLCHECK='-k'
447
            AXELNOSSLCHECK='--insecure'
448
            ;;
449
        -H|--header)
450
            # TODO: error if header value contains spaces
451 452
            if [ -z "$argvalue" ];then
                shift
453
                argvalue="${1/ /}"
454
            fi
455 456 457
            WGETHEADER="--header=$argvalue"
            CURLHEADER="--header $argvalue"
            AXELHEADER="--header=$argvalue"
458
            ;;
459 460 461 462 463
        -P|--output-dir)
            shift
            CURLOUTPUTDIR="--create-dirs --output-dir $1"
            WGETOUTPUTDIR="-P $1"
            ;;
464 465 466 467
        -U|-A|--user-agent)
            user_agent="Mozilla/5.0 (X11; Linux $arch) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
            WGETUSERAGENT="-U '$user_agent'"
            CURLUSERAGENT="-A '$user_agent'"
468
            AXELUSERAGENT="--user-agent='$user_agent'"
469
            ;;
470 471 472 473
        --compressed)
            CURLCOMPRESSED='--compressed'
            WGETCOMPRESSED='--compression=auto'
            ;;
474 475 476 477 478 479
        -4|--ipv4|--inet4-only)
            FORCEIPV="-4"
            ;;
        -6|--ipv6|--inet6-only)
            FORCEIPV="-6"
            ;;
480
        --list|--list-only)
481 482 483
            LISTONLY="$1"
            set_quiet
            ;;
484
        --check-url)
485
            CHECKURL="$1"
486
            #set_quiet
487
            ;;
488
        --check-site|--check)
489 490 491
            CHECKSITE="$1"
            #set_quiet
            ;;
492 493 494
        --get-filename)
            GETFILENAME="$1"
            ;;
495 496 497
        --get-response)
            GETRESPONSE="$1"
            ;;
498 499 500
        --get-real-url)
            GETREALURL="$1"
            ;;
501 502 503
        --get-ipfs-cid)
            GETIPFSCID="$1"
            ;;
504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
        --latest)
            LATEST="$1"
            ;;
        --second-latest)
            SECONDLATEST="$1"
            ;;
        --check-mirrors)
            CHECKMIRRORS="$1"
            ;;
        -O)
            shift
            TARGETFILE="$1"
            ;;
        -O-)
            TARGETFILE="-"
            ;;
520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
        -nd|--no-directories)
            WGETNODIRECTORIES="$1"
            ;;
        -c|--continue)
            WGETCONTINUE="$1"
            CURLCONTINUE="-C -"
            ;;
        -T|--timeout)
            if [ -z "$argvalue" ];then
                shift
                argvalue="$1"
            fi
            WGETTIMEOUT="--timeout $argvalue"
            CURLMAXTIME="--max-time $argvalue"
            ;;
        --read-timeout)
            if [ -z "$argvalue" ];then
                shift
                argvalue="$1"
            fi
            WGETREADTIMEOUT="--read-timeout $argvalue"
            if [ -z "$CURLMAXTIME" ];then
                CURLMAXTIME="--max-time $argvalue"
            fi
            ;;
        --retry-connrefused)
            WGETRETRYCONNREFUSED="$1"
            CURLRETRYCONNREFUSED="$1"
            ;;
549 550 551 552
        --trust-server-names)
            WGETRUSTSERVERNAMES="--trust-server-names"
            CURLTRUSTSERVERNAMES="-w '%{url_effective}'"
            ;;
553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575
        -t|--tries)
            if [ -z "$argvalue" ];then
                shift
                argvalue="$1"
            fi

            case "$argvalue" in
                0|inf)
                    CURLRETRY="--retry 1000"
                    WGETTRIES="--tries $argvalue"
                    ;;

                *)
                    WGETTRIES="--tries $argvalue"
                    CURLRETRY="--retry $(($argvalue-1))"
                    ;;
            esac
            ;;
        --load-cookies)
            shift;
            WGETLOADCOOKIES="--load-cookies $1"
            CURLCOOKIE="--cookie $1"
            ;;
576 577 578
        -*)
            fatal "Unknown option '$1', check eget --help."
            ;;
579 580 581 582
        *)
            break
            ;;
    esac
583
    shift
584
done
585 586


587 588
#############################3
# defaults
589 590

# https://github.com/ipfs/kubo/issues/5541
591
ipfs_diag_timeout='--timeout 20s'
592

593 594 595 596 597
ipfs_api_local="/ip4/127.0.0.1/tcp/5001"
[ -n "$EGET_IPFS_API" ] && ipfs_api_local="$EGET_IPFS_API"

ipfs_api_brave="/ip4/127.0.0.1/tcp/45005"

598
# Public IPFS http gateways
599
ipfs_gateways="https://dhash.ru/ipfs https://ipfs.io/ipfs https://gateway.pinata.cloud/ipfs https://dweb.link/ipfs"
600 601 602 603 604 605

# Test data: https://etersoft.ru/templates/etersoft/images/logo.png
ipfs_checkQm="QmYwf2GAMvHxfFiUFL2Mr6KUG6QrDiupqGc8ms785ktaYw"

get_ipfs_brave()
{
606
    local ipfs_brave="$(ls ~/.config/BraveSoftware/Brave-Browser/*/*/go-ipfs_* 2>/dev/null | sort | tail -n1)"
607 608 609 610
    [ -n "$ipfs_brave" ] && [ -x "$ipfs_brave" ] || return
    echo "$ipfs_brave"
}

611
ipfs_api_access()
612
{
613
    [ -n "$IPFS_CMD" ] || fatal "IPFS is disabled"
614 615 616 617 618
    if [ -n "$verbose" ] ; then
         verdocmd $IPFS_CMD --api $IPFS_API $ipfs_diag_timeout diag sys >/dev/null
    else
         verdocmd $IPFS_CMD --api $IPFS_API $ipfs_diag_timeout diag sys >/dev/null 2>/dev/null
    fi
619 620 621 622
}

ipfs_check()
{
623
    [ -n "$IPFS_CMD" ] || fatal "IPFS is disabled"
624
    verdocmd $IPFS_CMD --api $IPFS_API $ipfs_diag_timeout cat "$1" >/dev/null
625 626
}

627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
check_ipfs_gateway()
{
    local ipfs_gateway="$1"
    # TODO: check checksum
    if docmd eget --check-url "$ipfs_gateway/$ipfs_checkQm" ; then
        ipfs_mode="gateway"
        return
    fi

    if docmd eget --check-site "$(dirname $ipfs_gateway)" ; then
       info "IPFS gateway $ipfs_gateway is accessible, but can't return shared $ipfs_checkQm"
    else
       info "IPFS gateway $(dirname $ipfs_gateway) is not accessible"
    fi

    return 1
}
644

645 646
select_ipfs_gateway()
{
647 648 649 650 651 652 653 654 655

    IPFS_GATEWAY=''

    # if set some http gateway, use only it
    if [ -n "$EGET_IPFS_GATEWAY" ] ; then
        check_ipfs_gateway "$EGET_IPFS_GATEWAY" && IPFS_GATEWAY="$EGET_IPFS_GATEWAY" || ipfs_mode="disabled"
        return
    fi

656 657 658 659 660 661 662 663 664 665 666
    # check public http gateways
    for ipfs_gateway in $ipfs_gateways ; do
        check_ipfs_gateway $ipfs_gateway || continue
        IPFS_GATEWAY="$ipfs_gateway"
        return
    done

    ipfs_mode="disabled"
    return 1
}

667

668 669
select_ipfs_mode()
{
670 671 672
    IPFS_CMD="$(print_command_path ipfs)"
    if [ -n "$IPFS_CMD" ] ; then
        IPFS_API="$ipfs_api_local"
673
        if ipfs_api_access ; then
674
            ipfs_mode="local" && return
675
            #if ipfs_check "$ipfs_checkQm" ; then
676
            #    ipfs_mode="local" && return
677
            #else
678
            #    info "Skipped local: it is accessible via $IPFS_CMD --api $IPFS_API, but can't return shared $ipfs_checkQm"
679
            #fi
680 681 682
        fi
    fi

683
    # disabled, browser not for mass downloading
684 685
    IPFS_CMD="$(get_ipfs_brave)"
    # if no EGET_IPFS_API, check brave
686
    if false && [ -z "$EGET_IPFS_API" ] && [ -n "$IPFS_CMD" ] ; then
687
        IPFS_API="$ipfs_api_brave"
688
        if ipfs_api_access ; then
689
            ipfs_mode="brave" && return
690
            #if ipfs_check "$ipfs_checkQm" ; then
691
            #    ipfs_mode="brave" && return
692
            #else
693
            #    info "Skipped Brave: it is accessible via $IPFS_CMD --api $IPFS_API, but can't return shared $ipfs_checkQm"
694
            #fi
695 696 697
        fi
    fi

698
    select_ipfs_gateway
699
}
700 701


702
# Functions for work with eget ipfs db
703 704 705 706
get_cid_by_url()
{
    local URL="$1"
    [ -r "$EGET_IPFS_DB" ] || return
707
    is_fileurl "$URL" && return 1
708
    tac "$EGET_IPFS_DB" | grep -F "$URL Qm" | cut -f2 -d" " | grep -E "Qm[[:alnum:]]{44}" | head -n1
709 710 711 712 713 714 715 716
}

put_cid_and_url()
{
    local URL="$1"
    local CID="$2"
    local FN="$3"
    [ -w "$EGET_IPFS_DB" ] || return
717 718 719

    is_fileurl "$URL" && return

720 721 722 723 724
    local ac="$(get_url_by_cid)"
    if [ "$ac" = "$URL" ] ; then
        info "CID $CID is already exist as the same URL $URL in IPFS DB, skipping"
        return
    fi
725
    echo "$URL $CID $FN" >> "$EGET_IPFS_DB"
726
    info "Placed in $EGET_IPFS_DB: $URL $CID $FN"
727 728 729 730 731
}

get_filename_by_cid()
{
    local CID="$1"
732
    [ -z "$EGET_IPFS_DB" ] && basename "$CID" && return
733
    tac "$EGET_IPFS_DB" | grep -F " $CID " | head -n1 | cut -f3 -d" "
734 735
}

736 737 738 739
get_url_by_cid()
{
    local CID="$1"
    [ -z "$EGET_IPFS_DB" ] && echo "$CID" && return
740
    tac "$EGET_IPFS_DB" | grep -F " $CID " | head -n1 | cut -f1 -d" "
741 742
}

743 744
###################

745

746 747 748 749 750
ipfs_mode="$EGET_IPFS"

# enable auto mode when set $EGET_IPFS_DB
[ -z "$ipfs_mode" ] && [ -n "$EGET_IPFS_DB" ] && ipfs_mode="auto"

751
if [ -n "$LISTONLY$CHECKURL$CHECKSITE" ] ; then
752
    ipfs_mode=""
753 754 755
    EGET_IPFS_DB=''
fi

756

757
if [ -n "$ipfs_mode" ] && [ -n "$EGET_IPFS_DB" ] ; then
758 759 760 761
    ddb="$(dirname "$EGET_IPFS_DB")"
    if [ -d "$ddb" ] ; then
        info "Using eget IPFS db $EGET_IPFS_DB"
        [ -r "$EGET_IPFS_DB" ] || touch "$EGET_IPFS_DB"
762 763 764 765 766 767
    else
        EGET_IPFS_DB=''
    fi
fi


768
# detect if we run with ipfs:// or with auto
769
if is_ipfsurl "$1" && [ -z "$ipfs_mode" ] || [ "$ipfs_mode" = "auto" ] ; then
770
    info "Autodetecting available IPFS relay..."
771 772
    select_ipfs_mode
    info "Auto selected IPFS mode: $ipfs_mode"
773
    [ "$ipfs_mode" = "gateway" ] && info "Since the local ipfs service is not accessible, the http gateway will be used."
774
else
775
    [ "$ipfs_mode" = "gateway" ] && select_ipfs_gateway
776
    [ -n "$ipfs_mode" ] && info "IPFS mode: $ipfs_mode"
777 778
fi

779 780 781 782 783 784
IPFS_CMD=''

if [ "$ipfs_mode" = "disabled" ] ; then

ipfs_get()
{
785
    fatal "IPFS is disabled"
786 787 788 789
}

ipfs_put()
{
790
    fatal "IPFS is disabled"
791 792 793 794
}

ipfs_cat()
{
795
    fatal "IPFS is disabled"
796 797 798 799 800
}


elif [ "$ipfs_mode" = "brave" ] ; then
    IPFS_CMD="$(get_ipfs_brave)" || fatal "Can't find ipfs command in Brave"
801
    IPFS_PRETTY_CMD="~Brave-Browser/$(basename $IPFS_CMD)"
802
    IPFS_API="$ipfs_api_brave"
803
    ipfs_api_access || fatal "Can't access to Brave IPFS API (Brave browser is not running and IPFS is not activated?)"
804
    info "Will use $IPFS_PRETTY_CMD --api $IPFS_API"
805 806 807

elif [ "$ipfs_mode" = "local" ] ; then
    IPFS_CMD="$(print_command_path ipfs)" || fatal "Can't find ipfs command"
808
    IPFS_PRETTY_CMD="$IPFS_CMD"
809
    IPFS_API="$ipfs_api_local"
810
    ipfs_api_access || fatal "Can't access to IPFS API (ipfs daemon is not running?)"
811
    info "Will use $IPFS_PRETTY_CMD --api $IPFS_API"
812 813

elif [ "$ipfs_mode" = "gateway" ] ; then
814
    info "Will use eget $IPFS_GATEWAY/HASH"
815 816

ipfs_get_real_url()
817 818
{
    [ -n "$IPFS_GATEWAY" ] || fatal "ipfs http gateway is not set"
819 820 821 822 823
    echo "$IPFS_GATEWAY/$1"
}

ipfs_get()
{
824
    if [ -n "$2" ] ; then
825
        docmd eget -O "$2" "$(ipfs_get_real_url "$1")"
826
    else
827
        docmd eget "$(ipfs_get_real_url "$1")"
828 829 830 831 832 833 834 835 836 837 838
    fi
}

ipfs_cat()
{
    # FIXME:
    ipfs_get "$1" "-"
}

ipfs_put()
{
839 840
    info "IPFS put skipped when a gateway is used"
    return 1
841 842 843 844 845 846 847 848 849
}
elif [ -z "$ipfs_mode" ] ; then
    :
else
    fatal "Unsupported eget ipfs mode $ipfs_mode"
fi

if [ -n "$IPFS_CMD" ] ; then

850 851 852 853 854
ipfs_get_real_url()
{
    return 1
}

855 856 857 858
ipfs_get()
{
    [ -n "$IPFS_CMD" ] || fatal "ipfs api is not usable"
    if [ -n "$2" ] ; then
859 860
        showcmd $IPFS_PRETTY_CMD --api $IPFS_API get -o "$2" "$1"
        $IPFS_CMD --api $IPFS_API get -o "$2" "$1"
861
    else
862 863
        showcmd $IPFS_PRETTY_CMD --api $IPFS_API get "$1"
        $IPFS_CMD --api $IPFS_API get "$1"
864 865 866 867 868 869
    fi
}

ipfs_put()
{
    [ -n "$IPFS_CMD" ] || fatal "ipfs api is not usable"
870 871 872 873 874

    # detect if -q is used (will output Qm instead of addded Qm)
    local qu="$1"
    [ "$qu" = "-q" ] || qu=''

875
    showcmd $IPFS_PRETTY_CMD --api $IPFS_API add "$@"
876 877 878 879 880 881 882 883 884 885 886

    local res
    res="$($IPFS_CMD --api $IPFS_API add "$@")" || return

    if [ -z "$qu" ] ; then
        res="$(echo "$res" | grep "^added Qm")" || return
        res="$(echo "$res" | cut -f2 -d" ")"
    fi

    is_ipfs_hash "$res" && echo "$res" && return
    fatal "Can't recognize $res IPFS hash"
887 888 889 890 891
}

ipfs_cat()
{
    [ -n "$IPFS_CMD" ] || fatal "ipfs api is not usable"
892 893
    showcmd $IPFS_PRETTY_CMD --api $IPFS_API cat "$1"
    $IPFS_CMD --api $IPFS_API cat "$1"
894 895 896 897 898
}

fi
###############################

899 900


901
WGET="$(print_command_path wget)"
902
CURL="$(print_command_path curl)"
903

904
ORIG_EGET_BACKEND="$EGET_BACKEND"
905

906
# override backend
907
if is_fileurl "$1" ; then
908 909 910 911 912
    EGET_BACKEND="file"
elif is_ipfsurl "$1" ; then
    EGET_BACKEND="ipfs"
fi

913 914 915 916
orig_EGET_BACKEND="$EGET_BACKEND"
EGET_BACKEND="$(basename "$EGET_BACKEND")"

case "$orig_EGET_BACKEND" in
917 918
    file|ipfs)
        ;;
919
    */wget)
920 921
        WGET="$orig_EGET_BACKEND"
        [ -x "$WGET" ] || fatal "There are no $orig_EGET_BACKEND in the system but you forced using it via EGET_BACKEND. Install it with $ epm install wget"
922
        ;;
923 924 925
    wget)
        [ -n "$WGET" ] || fatal "There are no wget in the system but you forced using it via EGET_BACKEND. Install it with $ epm install wget"
        ;;
926
    */curl)
927 928
        CURL="$orig_EGET_BACKEND"
        [ -x "$CURL" ] || fatal "There are no $orig_EGET_BACKEND in the system but you forced using it via EGET_BACKEND. Install it with $ epm install curl"
929
        ;;
930 931 932 933
    curl)
        [ -n "$CURL" ] || fatal "There are no curl in the system but you forced using it via EGET_BACKEND. Install it with $ epm install curl"
        ;;
    '')
934 935 936
        [ -n "$WGET" ] && EGET_BACKEND="wget"
        [ -z "$EGET_BACKEND" ] && [ -n "$CURL" ] && EGET_BACKEND="curl"
        [ -n "$EGET_BACKEND" ] || fatal "There are no wget nor curl in the system. Install something with $ epm install wget"
937 938 939 940 941 942 943 944 945
        ;;
    *)
        fatal "Uknown EGET_BACKEND $EGET_BACKEND"
        ;;
esac



if [ "$EGET_BACKEND" = "file" ] ; then
946 947

# put remote content to stdout
948
url_scat()
949 950
{
    local URL="$1"
951
    cat "$(path_from_url "$URL")"
952 953
}
# download to default name of to $2
954
url_sget()
955 956 957 958 959 960
{
    local URL="$1"
    if [ "$2" = "/dev/stdout" ] || [ "$2" = "-" ] ; then
       scat "$URL"
       return
    elif [ -n "$2" ] ; then
961
       cp -av "$(path_from_url "$URL")" "$2"
962 963
       return
    fi
964
    cp -av "$(path_from_url "$URL")" .
965 966
}

967 968 969 970 971 972 973
url_check_accessible()
{
    local URL="$1"
    test -f "$(path_from_url "$URL")"
}

url_check_available()
974 975
{
    local URL="$1"
976
    test -f "$(path_from_url "$URL")"
977 978
}

979 980 981 982 983
url_get_filename()
{
    basename "$1"
}

984 985 986 987 988
url_get_real_url()
{
    echo "$1"
}

989
elif [ "$EGET_BACKEND" = "ipfs" ] ; then
990 991

# put remote content to stdout
992
url_scat()
993 994
{
    local URL="$1"
995
    ipfs_cat "$(cid_from_url "$URL")"
996 997
}
# download to default name of to $2
998
url_sget()
999 1000 1001 1002 1003 1004
{
    local URL="$1"
    if [ "$2" = "/dev/stdout" ] || [ "$2" = "-" ] ; then
       scat "$URL"
       return
    elif [ -n "$2" ] ; then
1005
       ipfs_get "$(cid_from_url "$URL")" "$2"
1006 1007
       return
    fi
1008 1009 1010 1011 1012 1013 1014

    local fn="$(url_print_filename_from_url "$URL")"
    if [ -n "$fn" ] ; then
       ipfs_get "$(cid_from_url "$URL")" "$fn"
       return
    fi

1015
    ipfs_get "$(cid_from_url "$URL")"
1016 1017
}

1018 1019 1020 1021 1022 1023 1024 1025
url_check_accessible()
{
    local URL="$1"
    # TODO: improve me
    scat "$URL" >/dev/null
}

url_check_available()
1026 1027 1028 1029 1030 1031
{
    local URL="$1"
    # TODO: improve me
    scat "$URL" >/dev/null
}

1032 1033 1034 1035 1036 1037 1038
url_print_filename_from_url()
{
    local URL="$1"
    local fn="$(echo "$URL" | sed -e 's|ipfs://.*\?filename=||')"
    [ "$URL" != "$fn" ] && echo "$fn" && return
}

1039 1040
url_get_filename()
{
1041
    local URL="$1"
1042
    url_print_filename_from_url "$URL" && return
1043 1044 1045 1046
    local CID="$(cid_from_url "$URL")"
    get_filename_by_cid "$CID"
}

1047 1048 1049 1050
url_get_real_url()
{
    local URL="$1"
    local CID="$(cid_from_url "$URL")"
1051 1052
    # if we use gateway, return URL with gateway
    ipfs_get_real_url "$URL" && return
1053 1054 1055
    get_url_by_cid "$CID"
}

1056

1057
elif [ "$EGET_BACKEND" = "wget" ] ; then
1058 1059 1060
__wget()
{
    if [ -n "$WGETUSERAGENT" ] ; then
1061
        docmd $WGET $FORCEIPV $WGETQ $WGETCOMPRESSED $WGETHEADER $WGETNOSSLCHECK $WGETNODIRECTORIES $WGETCONTINUE $WGETTIMEOUT $WGETREADTIMEOUT $WGETRETRYCONNREFUSED $WGETTRIES $WGETLOADCOOKIES $WGETRUSTSERVERNAMES "$WGETUSERAGENT" "$@"
1062
    else
1063
        docmd $WGET $FORCEIPV $WGETQ $WGETCOMPRESSED $WGETHEADER $WGETNOSSLCHECK $WGETNODIRECTORIES $WGETCONTINUE $WGETTIMEOUT $WGETREADTIMEOUT $WGETRETRYCONNREFUSED $WGETTRIES $WGETLOADCOOKIES $WGETRUSTSERVERNAMES "$@"
1064 1065
    fi
}
1066

1067
# put remote content to stdout
1068
url_scat()
1069
{
1070
    local URL="$1"
1071
    download_with_mirroring __wget "$URL" -O-
1072 1073
}
# download to default name of to $2
1074
url_sget()
1075
{
1076
    local URL="$1"
1077
    if [ "$2" = "/dev/stdout" ] || [ "$2" = "-" ] ; then
1078 1079
       scat "$URL"
       return
1080
    elif [ -n "$2" ] ; then
1081
       download_with_mirroring __wget "$URL" -O "$2"
1082 1083
       return
    fi
1084 1085 1086 1087
# TODO: поддержка rsync для известных хостов?
# Не качать, если одинаковый размер и дата
# -nc
# TODO: overwrite always
1088
    download_with_mirroring __wget "$URL" $WGETNAMEOPTIONS
1089 1090
}

1091
url_get_response()
1092 1093
{
    local URL="$1"
1094
    local answer
1095
    answer="$(quiet=1 __wget --timeout 20 --tries 1 --spider -S "$URL" 2>&1)"
1096
    # HTTP/1.1 405 Method Not Allowed
1097 1098 1099
    # HTTP/1.1 404 Not Found
    if echo "$answer" | grep -q "^ *HTTP/[12.]* 40[45]" ; then
        (quiet=1 __wget -O/dev/null --header="Range: bytes=0-0" -S "$URL" 2>&1)
1100 1101 1102
        return
    fi
    echo "$answer"
1103 1104
}

1105

1106
elif [ "$EGET_BACKEND" = "curl" ] ; then
1107

1108 1109 1110
__curl()
{
    if [ -n "$CURLUSERAGENT" ] ; then
1111
        docmd $CURL $FORCEIPV --fail -L $CURLQ $CURLCOMPRESSED $CURLHEADER $CURLOUTPUTDIR $CURLNOSSLCHECK $CURLCONTINUE $CURLMAXTIME $CURLRETRYCONNREFUSED $CURLRETRY $CURLCOOKIE $CURLTRUSTSERVERNAMES "$CURLUSERAGENT" "$@"
1112
    else
1113
        docmd $CURL $FORCEIPV --fail -L $CURLQ $CURLCOMPRESSED $CURLHEADER $CURLNOSSLCHECK $CURLCONTINUE $CURLMAXTIME $CURLRETRYCONNREFUSED $CURLRETRY $CURLCOOKIE $CURLTRUSTSERVERNAMES "$@"
1114 1115
    fi
}
1116
# put remote content to stdout
1117
url_scat()
1118
{
1119
    local URL="$1"
1120
    download_with_mirroring __curl "$URL" --output -
1121 1122
}
# download to default name of to $2
1123
url_sget()
1124
{
1125 1126
    local URL="$1"
    local res
1127 1128
    if [ "$2" = "/dev/stdout" ] || [ "$2" = "-" ] ; then
       scat "$1"
1129
       return
1130
    elif [ -n "$2" ] ; then
1131
       download_with_mirroring __curl "$URL" --output "$2"
1132
       return
1133
    fi
1134

1135 1136 1137 1138 1139 1140
    local FILENAME=$(url_get_filename "$URL")
    if [ -n "$FILENAME" ] ; then
        download_with_mirroring __curl "$URL" --remote-time --remote-header-name --output "$FILENAME"
        return
    fi
    
1141
    download_with_mirroring __curl "$URL" $CURLNAMEOPTIONS
1142
}
1143

1144
url_get_response()
1145 1146
{
    local URL="$1"
1147
    local answer
1148
    answer="$(quiet=1 __curl --max-time 20 --retry 0 -LI "$URL" 2>&1)"
1149
    # HTTP/1.1 405 Method Not Allowed
1150 1151
    # HTTP/1.1 404 Not Found
    if echo "$answer" | grep -q "^ *HTTP/[12.]* 40[45]" ; then
1152
        (quiet=1 __curl --max-time 20 --retry 0 -L -i -r0-0 "$URL" 2>&1)
1153 1154 1155
        return
    fi
    echo "$answer"
1156 1157
}

1158 1159
else
    fatal "Unknown EGET_BACKEND '$EGET_BACKEND', logical error."
1160 1161 1162 1163 1164 1165
fi


# Common code for both wget and curl (http related)
if [ "$EGET_BACKEND" = "wget" ] || [ "$EGET_BACKEND" = "curl" ] ; then

1166 1167 1168 1169 1170 1171
url_get_headers()
{
    local URL="$1"
    url_get_response "$URL" | grep -i "^ *[[:alpha:]].*: " | sed -e 's|^ *||' -e 's|\r$||'
}

1172
url_check_accessible()
1173 1174
{
    local URL="$1"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1175
    url_get_response "$URL" | grep "HTTP/" | tail -n1 | grep -q -w "200\|404"
1176 1177
}

1178 1179 1180 1181 1182 1183
url_check_available()
{
    local URL="$1"
    url_get_response "$URL" | grep "HTTP/" | tail -n1 | grep -q -w "200"
}

1184 1185 1186 1187
url_get_header()
{
    local URL="$1"
    local HEADER="$2"
1188
    url_get_headers "$URL" | grep -i "^ *$HEADER: " | sed -e "s|^ *$HEADER: ||i"
1189 1190
}

1191 1192 1193 1194 1195 1196 1197 1198 1199
url_get_real_url()
{
    local URL="$1"

    ! is_httpurl "$URL" && echo "$URL" && return

    # don't check location if we have made form of the URL
    [ -n "$MADEURL" ] && [ "$MADEURL" = "$URL" ] && echo "$URL" && return

1200
    local loc
1201
    for loc in $(url_get_header "$URL" "Location" | tac | sed -e 's| .*||') ; do
1202
        # hack for construct full url from related Location
1203
        if is_abs_path "$loc" ; then
1204
            loc="$(concatenate_url_and_filename "$(get_host_only "$URL")" "$loc")" #"
1205
        fi
1206 1207 1208 1209 1210
        if ! is_strange_url "$loc" ; then
            echo "$loc"
            return
        fi
    done
1211 1212 1213 1214

    echo "$URL"
}

1215 1216
url_get_filename()
{
1217
    local URL="$1"
1218 1219 1220

    ! is_httpurl "$URL" && basename "$URL" && return

1221
    # See https://www.cpcwood.com/blog/5-aws-s3-utf-8-content-disposition
1222
    # https://www.rfc-editor.org/rfc/rfc6266
1223
    local cd="$(url_get_header "$URL" "Content-Disposition")"
1224
    if echo "$cd" | grep -qi "filename\*= *UTF-8" ; then
1225
        #Content-Disposition: attachment; filename="unityhub-amd64-3.3.0.deb"; filename*=UTF-8''"unityhub-amd64-3.3.0.deb"
1226
        echo "$cd" | sed -e "s|.*filename\*= *UTF-8''||i" -e 's|^"||' -e 's|";$||' -e 's|"$||'
1227 1228
        return
    fi
1229
    if echo "$cd" | grep -qi "filename=" ; then
1230
        #Content-Disposition: attachment; filename=postman-linux-x64.tar.gz
1231
        #content-disposition: attachment; filename="code-1.77.1-1680651749.el7.x86_64.rpm"
1232
        echo "$cd" | sed -e 's|.*filename= *||i' -e 's|^"||' -e 's|";.*||' -e 's|"$||'
1233 1234 1235
        return
    fi

1236
    basename "$(url_get_real_url "$URL")"
1237 1238
}

1239 1240
fi

1241
if [ -n "$ipfs_mode" ] && [ -n "$EGET_IPFS_DB" ] &&  ! is_ipfsurl "$1"  ; then
1242

1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255
download_to_ipfs()
{
    local URL="$1"
    local res
    #res="$(url_scat "$URL" | ipfs_put )" || return
    #res="$(echo "$res" | grep "^added Qm")" || return 1
    #CID="$(echo "$res" | cut -f2 -d" ")"
    # with -q to disable progress (mixed with download progress)
    res="$(url_scat "$URL" | ipfs_put -q)" || return
    is_ipfs_hash "$res" || return 1
    echo "$res"
}

1256 1257 1258 1259
# put remote content to stdout
scat()
{
    local URL="$1"
1260 1261 1262
    url_scat "$URL"

    # It is list only function. Don't save to IPFS
1263 1264 1265
    return

    ###################
1266

1267
    local CID="$(get_cid_by_url "$URL")"
1268
    if [ -n "$CID" ] && [ -z "$EGET_IPFS_FORCE_LOAD" ] ; then
1269 1270 1271 1272 1273
        info "$URL -> $CID"
        ipfs_cat "$CID"
        return
    fi

1274
    CID="$(download_to_ipfs "$URL")" || return
1275 1276 1277 1278 1279 1280 1281

    ipfs_cat "$CID" || return

    local FN="$(url_get_filename "$URL")" || return

    put_cid_and_url "$URL" "$CID" "$FN"
}
1282

1283 1284 1285 1286
# download to default name of to $2
sget()
{
    local URL="$1"
1287
    local TARGET="$2"
1288 1289 1290 1291

    if [ -n "$GETFILENAME" ] ; then
        get_filename "$URL"
        exit
1292 1293
    fi

Vitaly Lipatov's avatar
Vitaly Lipatov committed
1294
    local REALURL="$(get_real_url "$URL")" || return
1295

Vitaly Lipatov's avatar
Vitaly Lipatov committed
1296 1297 1298 1299 1300
    if [ -n "$GETREALURL" ] ; then
        echo "$REALURL"
        exit
    fi

1301 1302 1303 1304 1305 1306 1307
    # skip ipfs for cat
    if [ "$TARGET" = "/dev/stdout" ] || [ "$TARGET" = "-" ] ; then
       url_scat "$URL"
       return
    fi


1308 1309 1310 1311 1312
    #if is_strange_url "$REALURL" ; then
    #    info "Just download strange URL $REALURL, skipping IPFS"
    #    url_sget "$REALURL" "$TARGET"
    #    return
    #fi
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1313 1314

    local CID="$(get_cid_by_url "$REALURL")"
1315
    if [ -n "$CID" ] && [ -z "$EGET_IPFS_FORCE_LOAD" ] ; then
1316

1317 1318 1319 1320 1321
        if [ -n "$GETIPFSCID" ] ; then
            echo "$CID"
            exit
        fi

1322 1323 1324 1325 1326
        if [ -n "$GETFILENAME" ] ; then
            get_filename_by_cid "$CID"
            exit
        fi

1327 1328 1329 1330 1331
        if [ -n "$GETREALURL" ] ; then
            get_url_by_cid "$CID"
            exit
        fi

1332
        if [ -z "$TARGET" ] ; then
1333
            # TODO: in some cases we can get name from URL...
1334 1335 1336 1337 1338
            TARGET="$(get_filename_by_cid "$CID")"
            if [ -z "$TARGET" ] ; then
                TARGET="$CID"
            fi
        fi
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1339
        [ "$URL" = "$REALURL" ] && info "$URL -> $CID -> $TARGET" || info "$URL -> $REALURL -> $CID -> $TARGET"
1340 1341 1342 1343
        ipfs_get "$CID" "$TARGET" && return

        # fail get from IPFS, fallback
        url_sget "$REALURL" "$TARGET"
1344 1345 1346
        return
    fi

1347 1348

    # download and put to IPFS
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1349
    local FN="$(url_get_filename "$REALURL")" || return
1350 1351 1352 1353
    if [ -z "$TARGET" ] ; then
        TARGET="$FN"
    fi

1354
    if [ -n "$GETIPFSCID" ] ; then
1355
         # add to IPFS and print out CID
1356
         CID="$(ipfs_put --progress "$REALURL")" || return
1357 1358
         echo "$CID"
         exit
1359 1360
    fi

1361 1362
    # download file and add to IPFS
    url_sget "$REALURL" "$TARGET" || return
1363 1364 1365 1366

    # don't do ipfs put when gateway is using
    [ "$ipfs_mode" = "gateway" ] && return

1367
    CID="$(ipfs_put --progress "$TARGET")" || return
1368

Vitaly Lipatov's avatar
Vitaly Lipatov committed
1369
    put_cid_and_url "$REALURL" "$CID" "$FN"
1370 1371
}

1372
check_url_is_available()
1373 1374
{
    local URL="$1"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1375 1376
    local REALURL="$(get_real_url "$URL")" || return
    local CID="$(get_cid_by_url "$REALURL")"
1377
    if [ -n "$CID" ] ; then
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1378
        [ "$URL" = "$REALURL" ] && info "$URL -> $CID" || info "$URL -> $REALURL -> $CID"
1379 1380 1381 1382
        ipfs_check "$CID"
        return
    fi

1383
    CID="$(download_to_ipfs "$REALURL")" || return
1384

Vitaly Lipatov's avatar
Vitaly Lipatov committed
1385
    local FN="$(url_get_filename "$REALURL")" || return
1386
    ipfs_cat "$CID" >/dev/null || return
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1387
    put_cid_and_url "$REALURL" "$CID" "$FN"
1388 1389
}

1390 1391 1392 1393 1394
check_url_is_accessible()
{
    check_url_is_available "$@"
}

1395 1396 1397 1398 1399
get_filename()
{
    url_get_filename "$1"
}

1400 1401 1402 1403 1404
get_real_url()
{
    url_get_real_url "$1"
}

1405
else
1406 1407 1408 1409 1410 1411 1412
scat()
{
    url_scat "$@"
}

sget()
{
1413 1414 1415 1416 1417
    if [ -n "$GETFILENAME" ] ; then
        get_filename "$1"
        exit
    fi

1418 1419 1420 1421 1422
    if [ -n "$GETREALURL" ] ; then
        get_real_url "$1"
        exit
    fi

1423 1424 1425 1426 1427
    url_sget "$@"
}

check_url_is_accessible()
{
1428 1429 1430 1431 1432 1433
    url_check_accessible "$@"
}

check_url_is_available()
{
    url_check_available "$@"
1434
}
1435 1436 1437 1438 1439 1440

get_filename()
{
    url_get_filename "$1"
}

Vitaly Lipatov's avatar
Vitaly Lipatov committed
1441 1442 1443 1444 1445
get_real_url()
{
    url_get_real_url "$1"
}

1446 1447 1448
fi


1449 1450 1451 1452 1453 1454 1455
get_github_urls()
{
    # https://github.com/OWNER/PROJECT
    local owner="$(echo "$1" | sed -e "s|^https://github.com/||" -e "s|/.*||")" #"
    local project="$(echo "$1" | sed -e "s|^https://github.com/$owner/||" -e "s|/.*||")" #"
    [ -n "$owner" ] || fatal "Can't get owner from $1"
    [ -n "$project" ] || fatal "Can't get project from $1"
1456
    local URL="https://api.github.com/repos/$owner/$project/releases"
1457 1458 1459
    # api sometime returns unformatted json
    scat $URL | sed -e 's|,\(["{]\)|,\n\1|g' | \
        grep -i -o -E '"browser_download_url": *"https://.*"' | cut -d'"' -f4
1460 1461
}

1462 1463 1464 1465 1466 1467
# drop file path from URL
get_host_only()
{
    echo "$1/" | grep -Eo '(.*://[^/]+)'
}

1468 1469
concatenate_url_and_filename()
{
1470 1471 1472
    local url="$(echo "$1" | sed -e 's|/*$||' )"
    local fn="$(echo "$2" | sed -e 's|^/*||' )"
    echo "$url/$fn"
1473 1474
}

1475 1476 1477
# MADEURL filled with latest made URL as flag it is end form of URL
MADEURL=''

1478 1479 1480 1481 1482
# Args: URL filename
make_fileurl()
{
    local url="$1"
    local fn="$2"
1483

1484
    fn="$(echo "$fn" | sed -e 's|^./||' -e 's|^/+||')"
1485

1486
    if is_fileurl "$url" ; then
1487 1488
        # if it is url
        :
1489
    elif is_abs_path "$fn" ; then
1490 1491
        # if there is file path from the root of the site
        url="$(get_host_only "$url")"
1492
    elif ! have_end_slash_or_php_parametr "$url" ; then
1493
        url="$(dirname "$url")"
1494
    fi
1495

1496 1497
    MADEURL="$(concatenate_url_and_filename "$url" "$fn")"
    echo "$MADEURL"
1498 1499
}

1500
get_urls()
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1501
{
1502
    if is_fileurl "$URL" ; then
1503
        ls -1 "$(path_from_url "$URL")"
1504 1505 1506
        return
    fi

1507 1508 1509 1510
    # Hack: Converted markdown support
    # https://github.com/dotnet/core/blob/main/release-notes/9.0/preview/rc1/9.0.0-rc.1.md
    if false && echo "$URL" | grep -q "\.md$" ; then
        scat $URL | sed -e 's|<|<\n|g' | grep "https*" | sed -e 's|.*\(https*://\)|\1|' -e 's|".*||g'
1511 1512 1513
        return
    fi

1514
    # cat html, divide to lines by tags and cut off hrefs only
1515
    scat $URL | sed -e 's|<|<\n|g' -e 's|data-file=|href=|g' -e "s|href=http|href=\"http|g" -e "s|>|\">|g" -e "s|'|\"|g" | \
1516
         grep -i -o -E 'href="(.+)"' | sed -e 's|&amp;|\&|' | cut -d'"' -f2
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1517 1518
}

1519 1520

if [ -n "$CHECKURL" ] ; then
1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531
    #set_quiet
    URL="$1"
    check_url_is_available "$URL"
    res=$?
    if [ -n "$verbose" ] ; then
        [ "$res" = "0" ] && echo "$URL is accessible via network and file exists" || echo "$URL is NOT accessible via network or file does not exist"
    fi
    exit $res
fi

if [ -n "$CHECKSITE" ] ; then
1532
    #set_quiet
1533 1534 1535 1536 1537 1538 1539
    URL="$1"
    check_url_is_accessible "$URL"
    res=$?
    if [ -n "$verbose" ] ; then
        [ "$res" = "0" ] && echo "$URL is accessible via network" || echo "$URL is NOT accessible via network"
    fi
    exit $res
1540 1541
fi

1542 1543 1544 1545 1546
if [ -n "$GETRESPONSE" ] ; then
    url_get_response "$1"
    exit
fi

1547 1548

# separate part for github downloads
1549
if echo "$1" | grep -q "^https://github.com/" && \
1550
   echo "$1" | grep -q -v "/blob/" && echo "$1" | grep -q -v "/download/" && [ -n "$2" ] ; then
1551 1552 1553 1554 1555 1556 1557 1558 1559
    MASK="$2"

    if [ -n "$LISTONLY" ] ; then
        get_github_urls "$1" | filter_glob "$MASK" | filter_order
        exit
    fi

    ERROR=0
    for fn in $(get_github_urls "$1" | filter_glob "$MASK" | filter_order) ; do
1560
        MADEURL="$fn" # mark it is the end form of the URL
1561 1562 1563 1564 1565 1566
        sget "$fn" "$TARGETFILE" || ERROR=1
        [ -n "$TARGETFILE" ] && [ "$ERROR" = "0" ] && break
    done
    exit
fi

1567 1568 1569 1570 1571
if is_ipfsurl "$1" ; then
    [ -n "$2" ] && fatal "too many args when ipfs://Qm... used: extra '$2' arg"
    sget "$1" "$TARGETFILE"
    exit
fi
1572 1573 1574 1575 1576

# if mask is the second arg
if [ -n "$2" ] ; then
    URL="$1"
    MASK="$2"
1577
    SEPMASK="$2"
1578
else
1579
    if have_end_slash_or_php_parametr "$1" ; then
1580 1581 1582 1583 1584 1585 1586
        URL="$1"
        MASK=""
    else
        # drop mask part
        URL="$(dirname "$1")/"
        # wildcards allowed only in the last part of path
        MASK=$(basename "$1")
1587 1588 1589 1590
    fi

fi

1591
# https://www.freeoffice.com/download.php?filename=freeoffice-2021-1062.x86_64.rpm
1592 1593
if echo "$URL" | grep -q "[*\[\]]" ; then
    fatal "Error: there are globbing symbol (*[]) in $URL. It is allowed only for mask part"
1594 1595
fi

1596 1597
is_url "$MASK" && fatal "eget supports only one URL as argument"
[ -n "$3" ] && fatal "too many args: extra '$3'. May be you need use quotes for arg with wildcards."
1598 1599 1600 1601 1602 1603 1604 1605 1606 1607

# TODO: curl?
# If ftp protocol, just download
if echo "$URL" | grep -q "^ftp://" ; then
    [ -n "$LISTONLY" ] && fatal "TODO: list files for ftp:// is not supported yet"
    sget "$1" "$TARGETFILE"
    exit
fi


1608 1609 1610 1611 1612 1613 1614 1615
if [ -n "$LISTONLY" ] ; then
    for fn in $(get_urls | filter_glob "$MASK" | filter_order) ; do
        is_url "$fn" && echo "$fn" && continue
        make_fileurl "$URL" "$fn"
    done
    exit
fi

1616 1617 1618 1619 1620 1621 1622
is_wildcard()
{
    echo "$1" | grep -q "[*?]" && return
    echo "$1" | grep -q "\]" && return
    echo "$1" | grep -q "\[" && return
}

1623
# If there is no wildcard symbol like asterisk, just download
1624
if [ -z "$SEPMASK" ] && ! is_wildcard "$MASK" || echo "$MASK" | grep -q "[?].*="; then
1625 1626 1627 1628
    sget "$1" "$TARGETFILE"
    exit
fi

1629
ERROR=0
1630
for fn in $(get_urls | filter_glob "$MASK" | filter_order) ; do
1631
    is_url "$fn" || fn="$(make_fileurl "$URL" "$fn" )" #"
1632 1633
    sget "$fn" "$TARGETFILE" || ERROR=1
    [ -n "$TARGETFILE" ] && [ "$ERROR" = "0" ] && break
1634 1635
done
exit $ERROR