| @@ -0,0 +1,104 @@ | |||||
| #!/bin/sh - | |||||
| # Input, a file with a weight following by contents. The weight | |||||
| # is a number w/ a decimal point that is a weight factor that the | |||||
| # line will be returned. That is, the probability of a line being | |||||
| # selected and printed is weight / (sum of all weights). | |||||
| # | |||||
| # Sample input file: | |||||
| # .9 a | |||||
| # 1.2 b | |||||
| # 1.1 c | |||||
| # | |||||
| # a will be output w/ prob .9 / 3.2, b w/ prob 1.2 / 3.2 and c w/ prob 1.1 / 3.2 | |||||
| # | |||||
| # Testing: | |||||
| # for i in $(jot 1000); do (echo .5 a; echo 1.0 b; echo 1.0 c ) | sh randomline.fudge.sh ; done | sort | uniq -c | |||||
| # | |||||
| # a P(1/5) or ~200, b and c each P(2/5) or ~400 | |||||
| # | |||||
| # for i in $(jot 1000); do (echo .5 a; echo .5 b; echo .5 c; echo .5 d; echo .5 e; echo .5 f; echo .5 g; echo .5 h; echo 1.0 i ) | sh randomline.fudge.sh ; done | sort | uniq -c | |||||
| # | |||||
| # srand seed: | |||||
| # "dd if=/dev/random 2>/dev/null | LC_ALL=C tr -d -c 1-9 | dd bs=1 count=19 2>/dev/null" | getline seed; srand(seed) | |||||
| # + rand() | |||||
| LC_ALL=C awk ' | |||||
| function urandom(n) { | |||||
| # return random [0, n) | |||||
| # maxn needs to be 2**(x*4), and cbs=x | |||||
| maxn = 4294967296 | |||||
| if (n == 1) | |||||
| return 0 | |||||
| if (n > maxn) { | |||||
| printf("n too large!\n") > "/dev/stderr" | |||||
| exit(5) | |||||
| } | |||||
| for (;;) { | |||||
| "dd if=/dev/random | LC_ALL=C tr -c -d 0-9a-f | dd conv=unblock cbs=8" | getline a | |||||
| r = ("0x" a) + 0 | |||||
| quot = int(maxn / n) * n | |||||
| if (r >= quot) | |||||
| continue | |||||
| return r % n | |||||
| } | |||||
| } | |||||
| function getfudge(i, j) { | |||||
| dot = index(i, ".") | |||||
| if (dot == 0) { | |||||
| printf("invalid fudge, no dot\n") > "/dev/stderr" | |||||
| exit(6) | |||||
| } | |||||
| # .33 | |||||
| precs = length(i) - dot | |||||
| #print "a" precs " " length(i) " " dot | |||||
| precsv = 1 | |||||
| for (j = 0; j < precs; j++) { | |||||
| precsv = 10 * precsv | |||||
| } | |||||
| return i + 0.0 | |||||
| } | |||||
| { | |||||
| fudge = getfudge($1) * precsv | |||||
| # to be slightly better, split off length of $1, then find $2 and return that | |||||
| $0 = substr($0, index($0, $2)) | |||||
| if (NR == 1) { | |||||
| save = $0 | |||||
| # total fraction so far | |||||
| runfudge = fudge | |||||
| # precision of total fraction | |||||
| runprecsv = precsv | |||||
| next | |||||
| } | |||||
| if (runprecsv < precsv) { | |||||
| runfudge = runfudge * (precsv / runprecsv) | |||||
| runprecsv = precsv | |||||
| } else { | |||||
| fudge = fudge * (runprecsv / precsv) | |||||
| } | |||||
| #print "f: " fudge ", precs: " precs " " precsv ", runfudge: " runfudge " " runprecsv " " (runfudge + fudge) | |||||
| #"jot -r 1 0 " (NR - 1) | getline a | |||||
| a = urandom((runfudge + fudge)) | |||||
| #a = int(rand() * (NR)) | |||||
| #print "a: " a ", fudge: " fudge | |||||
| if (a < fudge) | |||||
| save = $0 | |||||
| runfudge += fudge | |||||
| } | |||||
| END { print save }' | |||||