|
- #!/bin/sh -
-
- # Input, a file with a weight following by contents. The weight
- # is a number w/ a decimal point that is a weight factor that the
- # line will be returned. That is, the probability of a line being
- # selected and printed is weight / (sum of all weights).
- #
- # Sample input file:
- # .9 a
- # 1.2 b
- # 1.1 c
- #
- # a will be output w/ prob .9 / 3.2, b w/ prob 1.2 / 3.2 and c w/ prob 1.1 / 3.2
- #
- # Testing:
- # for i in $(jot 1000); do (echo .5 a; echo 1.0 b; echo 1.0 c ) | sh randomline.fudge.sh ; done | sort | uniq -c
- #
- # a P(1/5) or ~200, b and c each P(2/5) or ~400
- #
- # for i in $(jot 1000); do (echo .5 a; echo .5 b; echo .5 c; echo .5 d; echo .5 e; echo .5 f; echo .5 g; echo .5 h; echo 1.0 i ) | sh randomline.fudge.sh ; done | sort | uniq -c
- #
-
- # srand seed:
- # "dd if=/dev/random 2>/dev/null | LC_ALL=C tr -d -c 1-9 | dd bs=1 count=19 2>/dev/null" | getline seed; srand(seed)
- # + rand()
-
- LC_ALL=C awk '
- function urandom(n) {
- # return random [0, n)
-
- # maxn needs to be 2**(x*4), and cbs=x
- maxn = 4294967296
-
- if (n == 1)
- return 0
-
- if (n > maxn) {
- printf("n too large!\n") > "/dev/stderr"
- exit(5)
- }
-
- for (;;) {
- "dd if=/dev/random | LC_ALL=C tr -c -d 0-9a-f | dd conv=unblock cbs=8" | getline a
-
- r = ("0x" a) + 0
- quot = int(maxn / n) * n
- if (r >= quot)
- continue
-
- return r % n
- }
- }
-
- function getfudge(i, j) {
- dot = index(i, ".")
- if (dot == 0) {
- printf("invalid fudge, no dot\n") > "/dev/stderr"
- exit(6)
- }
-
- # .33
- precs = length(i) - dot
- #print "a" precs " " length(i) " " dot
- precsv = 1
- for (j = 0; j < precs; j++) {
- precsv = 10 * precsv
- }
- return i + 0.0
- }
-
- {
- fudge = getfudge($1) * precsv
-
- # to be slightly better, split off length of $1, then find $2 and return that
- $0 = substr($0, index($0, $2))
-
- if (NR == 1) {
- save = $0
- # total fraction so far
- runfudge = fudge
- # precision of total fraction
- runprecsv = precsv
- next
- }
-
- if (runprecsv < precsv) {
- runfudge = runfudge * (precsv / runprecsv)
- runprecsv = precsv
- } else {
- fudge = fudge * (runprecsv / precsv)
- }
-
- #print "f: " fudge ", precs: " precs " " precsv ", runfudge: " runfudge " " runprecsv " " (runfudge + fudge)
- #"jot -r 1 0 " (NR - 1) | getline a
- a = urandom((runfudge + fudge))
- #a = int(rand() * (NR))
- #print "a: " a ", fudge: " fudge
- if (a < fudge)
- save = $0
-
- runfudge += fudge
- }
-
- END { print save }'
|