#!/bin/sh - # Input, a file with a weight following by contents. The weight # is a number w/ a decimal point that is a weight factor that the # line will be returned. That is, the probability of a line being # selected and printed is weight / (sum of all weights). # # Sample input file: # .9 a # 1.2 b # 1.1 c # # a will be output w/ prob .9 / 3.2, b w/ prob 1.2 / 3.2 and c w/ prob 1.1 / 3.2 # # Testing: # for i in $(jot 1000); do (echo .5 a; echo 1.0 b; echo 1.0 c ) | sh randomline.fudge.sh ; done | sort | uniq -c # # a P(1/5) or ~200, b and c each P(2/5) or ~400 # # for i in $(jot 1000); do (echo .5 a; echo .5 b; echo .5 c; echo .5 d; echo .5 e; echo .5 f; echo .5 g; echo .5 h; echo 1.0 i ) | sh randomline.fudge.sh ; done | sort | uniq -c # # srand seed: # "dd if=/dev/random 2>/dev/null | LC_ALL=C tr -d -c 1-9 | dd bs=1 count=19 2>/dev/null" | getline seed; srand(seed) # + rand() LC_ALL=C awk ' function urandom(n) { # return random [0, n) # maxn needs to be 2**(x*4), and cbs=x maxn = 4294967296 if (n == 1) return 0 if (n > maxn) { printf("n too large!\n") > "/dev/stderr" exit(5) } for (;;) { "dd if=/dev/random | LC_ALL=C tr -c -d 0-9a-f | dd conv=unblock cbs=8" | getline a r = ("0x" a) + 0 quot = int(maxn / n) * n if (r >= quot) continue return r % n } } function getfudge(i, j) { dot = index(i, ".") if (dot == 0) { printf("invalid fudge, no dot\n") > "/dev/stderr" exit(6) } # .33 precs = length(i) - dot #print "a" precs " " length(i) " " dot precsv = 1 for (j = 0; j < precs; j++) { precsv = 10 * precsv } return i + 0.0 } { fudge = getfudge($1) * precsv # to be slightly better, split off length of $1, then find $2 and return that $0 = substr($0, index($0, $2)) if (NR == 1) { save = $0 # total fraction so far runfudge = fudge # precision of total fraction runprecsv = precsv next } if (runprecsv < precsv) { runfudge = runfudge * (precsv / runprecsv) runprecsv = precsv } else { fudge = fudge * (runprecsv / precsv) } #print "f: " fudge ", precs: " precs " " precsv ", runfudge: " runfudge " " runprecsv " " (runfudge + fudge) #"jot -r 1 0 " (NR - 1) | getline a a = urandom((runfudge + fudge)) #a = int(rand() * (NR)) #print "a: " a ", fudge: " fudge if (a < fudge) save = $0 runfudge += fudge } END { print save }'