tools_estrlist 8.09 KB
Newer Older
Vitaly Lipatov's avatar
Vitaly Lipatov committed
1
#!/bin/bash
2
# 2009-2010, 2012, 2017, 2020 Etersoft www.etersoft.ru
Vitaly Lipatov's avatar
Vitaly Lipatov committed
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
# Author: Vitaly Lipatov <lav@etersoft.ru>
# Public domain

# TODO: rewrite with shell commands, perl or C
# Python - http://www.linuxtopia.org/online_books/programming_books/python_programming/python_ch16s03.html
# Shell  - http://linux.byexamples.com/archives/127/uniq-and-basic-set-theory/
#        - http://maiaco.com/articles/shellSetOperations.php
# Perl   - http://docstore.mik.ua/orelly/perl/cookbook/ch04_09.htm
#        - http://blogs.perl.org/users/polettix/2012/03/sets-operations.html
# http://rosettacode.org/wiki/Symmetric_difference
# TODO: add unit tests
# http://ru.wikipedia.org/wiki/Операции_над_множествами

# Base set operations:
# * union
#   "1 2 3" "3 4 5" -> "1 2 3 4 5"
# * intersection
#   "1 2 3" "3 4 5" -> "3"
# * relative complement (substracted, difference) ( A ? B – members in A but not in B )
# http://en.wikipedia.org/wiki/Complement_%28set_theory%29
#   "1 3" "1 2 3 4" -> "2 4"
# * symmetric difference (симметричная разность) ( A ^ B – members in A or B but not both )
# http://en.wikipedia.org/wiki/Symmetric_difference
#   "1 2 3" "3 4 5" -> "1 2 4 5"

28 29 30 31 32 33
fatal()
{
        echo "FATAL: $*" >&2
        exit 1
}

Vitaly Lipatov's avatar
Vitaly Lipatov committed
34 35 36 37 38 39 40 41 42 43 44 45
filter_strip_spaces()
{
        # possible use just
        #xargs echo
        sed -e "s| \+| |g" -e "s|^ ||" -e "s| \$||"
}

strip_spaces()
{
        echo "$*" | filter_strip_spaces
}

46
is_empty()
Vitaly Lipatov's avatar
Vitaly Lipatov committed
47 48 49 50
{
        [ "$(strip_spaces "$*")" = "" ]
}

51 52 53 54 55 56 57 58 59 60 61 62
isempty()
{
        is_empty "$@"
}

has_space()
{
        # not for dash:
        # [ "$1" != "${1/ //}" ]
        [ "$(echo "$*" | sed -e "s| ||")" != "$*" ]
}

Vitaly Lipatov's avatar
Vitaly Lipatov committed
63 64 65
list()
{
        local i
66
        set -f
Vitaly Lipatov's avatar
Vitaly Lipatov committed
67 68 69
        for i in $@ ; do
                echo "$i"
        done
70
        set +f
Vitaly Lipatov's avatar
Vitaly Lipatov committed
71 72 73 74
}

count()
{
75 76 77
        set -f
        list $@ | wc -l
        set +f
Vitaly Lipatov's avatar
Vitaly Lipatov committed
78 79 80 81
}

union()
{
82 83 84
        set -f
        strip_spaces $(list $@ | sort -u)
        set +f
Vitaly Lipatov's avatar
Vitaly Lipatov committed
85 86
}

87 88 89 90 91 92 93 94 95 96 97 98
intersection()
{
        local RES=""
        local i j
        for i in $2 ; do
            for j in $1 ; do
                [ "$i" = "$j" ] && RES="$RES $i"
            done
        done
        strip_spaces "$RES"
}

Vitaly Lipatov's avatar
Vitaly Lipatov committed
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
uniq()
{
        union $@
}

has()
{
	local wd="$1"
	shift
	echo "$*" | grep -q -- "$wd"
}

# Note: used egrep! write '[0-9]+(first|two)', not '[0-9]\+...'
match()
{
	local wd="$1"
	shift
116
	echo "$*" | grep -E -q -- "$wd"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
117 118 119 120 121 122 123 124
}


# remove_from_list "1." "11 12 21 22" -> "21 22"
reg_remove()
{
        local i
        local RES=
125
        set -f
Vitaly Lipatov's avatar
Vitaly Lipatov committed
126
        for i in $2 ; do
127
                echo "$i" | grep -q "^$1$" || RES="$RES $i"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
128
        done
129
        set +f
Vitaly Lipatov's avatar
Vitaly Lipatov committed
130 131 132 133 134 135 136
        strip_spaces "$RES"
}

# remove_from_list "1." "11 12 21 22" -> "21 22"
reg_wordremove()
{
        local i
137
        local RES=""
138
        set -f
Vitaly Lipatov's avatar
Vitaly Lipatov committed
139 140 141
        for i in $2 ; do
                echo "$i" | grep -q -w "$1" || RES="$RES $i"
        done
142
        set +f
Vitaly Lipatov's avatar
Vitaly Lipatov committed
143 144 145
        strip_spaces "$RES"
}

146 147 148 149 150 151 152 153 154 155
reg_rqremove()
{
        local i
        local RES=""
        for i in $2 ; do
                [ "$i" = "$1" ] || RES="$RES $i"
        done
        strip_spaces "$RES"
}

Vitaly Lipatov's avatar
Vitaly Lipatov committed
156 157 158 159 160 161
# Args: LIST1 LIST2
# do_exclude_list print LIST2 list exclude fields contains also in LIST1
# Example: exclude "1 3" "1 2 3 4" -> "2 4"
exclude()
{
        local i
162
        local RES="$2"
163
        set -f
164 165
        for i in $1 ; do
                RES="$(reg_rqremove "$i" "$RES")"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
166
        done
167
        set +f
Vitaly Lipatov's avatar
Vitaly Lipatov committed
168 169 170 171 172 173 174 175
        strip_spaces "$RES"
}

# regexclude_list "22 1." "11 12 21 22" -> "21"
reg_exclude()
{
        local i
        local RES="$2"
176
        set -f
Vitaly Lipatov's avatar
Vitaly Lipatov committed
177
        for i in $1 ; do
178
                RES="$(reg_remove "$i" "$RES")"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
179
        done
180
        set +f
Vitaly Lipatov's avatar
Vitaly Lipatov committed
181 182 183 184 185 186 187 188
        strip_spaces "$RES"
}

# regexclude_list "22 1." "11 12 21 22" -> "21"
reg_wordexclude()
{
        local i
        local RES="$2"
189
        set -f
Vitaly Lipatov's avatar
Vitaly Lipatov committed
190 191 192
        for i in $1 ; do
                RES=$(reg_wordremove "$i" "$RES")
        done
193
        set +f
Vitaly Lipatov's avatar
Vitaly Lipatov committed
194 195 196
        strip_spaces "$RES"
}

197 198 199
if_contain()
{
        local i
200
        set -f
201 202 203
        for i in $2 ; do
            [ "$i" = "$1" ] && return
        done
204
        set +f
205 206 207 208 209 210 211
        return 1
}

difference()
{
        local RES=""
        local i
212
        set -f
213 214 215 216 217 218
        for i in $1 ; do
            if_contain $i "$2" || RES="$RES $i"
        done
        for i in $2 ; do
            if_contain $i "$1" || RES="$RES $i"
        done
219
        set +f
220 221 222 223
        strip_spaces "$RES"
}


Vitaly Lipatov's avatar
Vitaly Lipatov committed
224 225 226 227 228
# FIXME:
# reg_include "1." "11 12 21 22" -> "11 12"
reg_include()
{
        local i
229
        local RES=""
230
        set -f
Vitaly Lipatov's avatar
Vitaly Lipatov committed
231 232 233
        for i in $2 ; do
                echo "$i" | grep -q -w "$1" && RES="$RES $i"
        done
234
        set +f
Vitaly Lipatov's avatar
Vitaly Lipatov committed
235 236 237
        strip_spaces "$RES"
}

Vitaly Lipatov's avatar
Vitaly Lipatov committed
238 239 240
contains()
{
    #estrlist has "$1" "$2"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
241
    local res="$(reg_wordexclude "$1" "$2")"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
242 243 244
    [ "$res" != "$2" ]
}

Vitaly Lipatov's avatar
Vitaly Lipatov committed
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
example()
{
        local CMD="$1"
        local ARG1="$2"
        shift 2
        echo "\$ $0 $CMD \"$ARG1\" \"$@\""
        $0 $CMD "$ARG1" "$@"
}

example_res()
{
	example "$@" && echo TRUE || echo FALSE
}

help()
{
        echo "estrlist developed for string list operations. See also cut, join, paste..."
        echo "Usage: $0 <command> [args]"
        echo "Commands:"
264 265 266 267 268 269 270 271
        echo "  strip_spaces [args]               - remove extra spaces"
# TODO: add filter
#        echo "  filter_strip_spaces               - remove extra spaces from words from standart input"
#        echo "  reg_remove  <PATTERN> [word list] - remove words containing a match to the given PATTERN (grep notation)"
#        echo "  reg_wordremove  <PATTERN> [word list] - remove words containing a match to the given PATTERN (grep -w notation)"
        echo "  exclude <list1> <list2>           - print list2 items exclude list1 items"
        echo "  reg_exclude <list PATTERN> [word list] - print only words that do not match PATTERN"
#        echo "  reg_wordexclude <list PATTERN> [word list] - print only words do not match PATTERN"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
272 273
        echo "  has <PATTERN> string              - check the string for a match to the regular expression given in PATTERN (grep notation)"
        echo "  match <PATTERN> string            - check the string for a match to the regular expression given in PATTERN (egrep notation)"
274 275
        echo "  isempty [string] (is_empty)       - true if string has no any symbols (only zero or more spaces)"
        echo "  has_space [string]                - true if string has no spaces"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
276
        echo "  union [word list]                 - sort and remove duplicates"
277 278
        echo "  intersection <list1> <list2>      - print only intersected items (the same in both lists)"
        echo "  difference <list1> <list2>        - symmetric difference between lists items (not in both lists)"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
279 280 281
        echo "  uniq [word list]                  - alias for union"
        echo "  list [word list]                  - just list words line by line"
        echo "  count [word list]                 - print word count"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
282
        echo "  contains <word> [word list]       - check if word list contains the word"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
283 284
        echo
        echo "Examples:"
285 286
#        example reg_remove "1." "11 12 21 22"
#        example reg_wordremove "1." "11 12 21 22"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
287 288 289 290
        example exclude "1 3" "1 2 3 4"
        example reg_exclude "22 1." "11 12 21 22"
        example reg_wordexclude "wo.* er" "work were more else"
        example union "1 2 2 3 3"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
291 292
        example_res contains "wo" "wo wor"
        example_res contains "word" "wo wor"
Vitaly Lipatov's avatar
Vitaly Lipatov committed
293 294 295 296 297 298 299 300 301 302 303
        example count "1 2 3 4 10"
        example_res isempty "  "
        #example_res isempty " 1 "
        example_res has ex "exactly"
        example_res has exo "exactly"
        example_res match "M[0-9]+" "M250"
        example_res match "M[0-9]+" "MI"
}

COMMAND="$1"
if [ -z "$COMMAND" ] ; then
304
        echo "Run with --help for get command description." >&2
Vitaly Lipatov's avatar
Vitaly Lipatov committed
305 306 307 308 309 310 311
        exit 1
fi

if [ "$COMMAND" = "-h" ] || [ "$COMMAND" = "--help" ] ; then
        COMMAND="help"
fi

312 313 314 315 316 317 318
#
case "$COMMAND" in
    reg_remove|reg_wordremove)
        fatal "obsoleted command $COMMAND"
        ;;
esac

Vitaly Lipatov's avatar
Vitaly Lipatov committed
319 320 321
shift

# FIXME: do to call function directly, use case instead?
322 323 324 325 326 327
if [ "$COMMAND" = "--" ] ; then
    # ignore all options (-)
    COMMAND="$1"
    shift
    "$COMMAND" "$@"
elif [ "$1" = "-" ] ; then
Vitaly Lipatov's avatar
Vitaly Lipatov committed
328 329 330 331 332 333 334
    shift
    "$COMMAND" "$(cat) $@"
elif [ "$2" = "-" ] ; then
    "$COMMAND" "$1" "$(cat)"
else
    "$COMMAND" "$@"
fi