| @@ -0,0 +1,104 @@ | |||
| #!/bin/sh - | |||
| # Input, a file with a weight following by contents. The weight | |||
| # is a number w/ a decimal point that is a weight factor that the | |||
| # line will be returned. That is, the probability of a line being | |||
| # selected and printed is weight / (sum of all weights). | |||
| # | |||
| # Sample input file: | |||
| # .9 a | |||
| # 1.2 b | |||
| # 1.1 c | |||
| # | |||
| # a will be output w/ prob .9 / 3.2, b w/ prob 1.2 / 3.2 and c w/ prob 1.1 / 3.2 | |||
| # | |||
| # Testing: | |||
| # for i in $(jot 1000); do (echo .5 a; echo 1.0 b; echo 1.0 c ) | sh randomline.fudge.sh ; done | sort | uniq -c | |||
| # | |||
| # a P(1/5) or ~200, b and c each P(2/5) or ~400 | |||
| # | |||
| # for i in $(jot 1000); do (echo .5 a; echo .5 b; echo .5 c; echo .5 d; echo .5 e; echo .5 f; echo .5 g; echo .5 h; echo 1.0 i ) | sh randomline.fudge.sh ; done | sort | uniq -c | |||
| # | |||
| # srand seed: | |||
| # "dd if=/dev/random 2>/dev/null | LC_ALL=C tr -d -c 1-9 | dd bs=1 count=19 2>/dev/null" | getline seed; srand(seed) | |||
| # + rand() | |||
| LC_ALL=C awk ' | |||
| function urandom(n) { | |||
| # return random [0, n) | |||
| # maxn needs to be 2**(x*4), and cbs=x | |||
| maxn = 4294967296 | |||
| if (n == 1) | |||
| return 0 | |||
| if (n > maxn) { | |||
| printf("n too large!\n") > "/dev/stderr" | |||
| exit(5) | |||
| } | |||
| for (;;) { | |||
| "dd if=/dev/random | LC_ALL=C tr -c -d 0-9a-f | dd conv=unblock cbs=8" | getline a | |||
| r = ("0x" a) + 0 | |||
| quot = int(maxn / n) * n | |||
| if (r >= quot) | |||
| continue | |||
| return r % n | |||
| } | |||
| } | |||
| function getfudge(i, j) { | |||
| dot = index(i, ".") | |||
| if (dot == 0) { | |||
| printf("invalid fudge, no dot\n") > "/dev/stderr" | |||
| exit(6) | |||
| } | |||
| # .33 | |||
| precs = length(i) - dot | |||
| #print "a" precs " " length(i) " " dot | |||
| precsv = 1 | |||
| for (j = 0; j < precs; j++) { | |||
| precsv = 10 * precsv | |||
| } | |||
| return i + 0.0 | |||
| } | |||
| { | |||
| fudge = getfudge($1) * precsv | |||
| # to be slightly better, split off length of $1, then find $2 and return that | |||
| $0 = substr($0, index($0, $2)) | |||
| if (NR == 1) { | |||
| save = $0 | |||
| # total fraction so far | |||
| runfudge = fudge | |||
| # precision of total fraction | |||
| runprecsv = precsv | |||
| next | |||
| } | |||
| if (runprecsv < precsv) { | |||
| runfudge = runfudge * (precsv / runprecsv) | |||
| runprecsv = precsv | |||
| } else { | |||
| fudge = fudge * (runprecsv / precsv) | |||
| } | |||
| #print "f: " fudge ", precs: " precs " " precsv ", runfudge: " runfudge " " runprecsv " " (runfudge + fudge) | |||
| #"jot -r 1 0 " (NR - 1) | getline a | |||
| a = urandom((runfudge + fudge)) | |||
| #a = int(rand() * (NR)) | |||
| #print "a: " a ", fudge: " fudge | |||
| if (a < fudge) | |||
| save = $0 | |||
| runfudge += fudge | |||
| } | |||
| END { print save }' | |||