|
|
@@ -0,0 +1,104 @@ |
|
|
|
#!/bin/sh - |
|
|
|
|
|
|
|
# Input, a file with a weight following by contents. The weight |
|
|
|
# is a number w/ a decimal point that is a weight factor that the |
|
|
|
# line will be returned. That is, the probability of a line being |
|
|
|
# selected and printed is weight / (sum of all weights). |
|
|
|
# |
|
|
|
# Sample input file: |
|
|
|
# .9 a |
|
|
|
# 1.2 b |
|
|
|
# 1.1 c |
|
|
|
# |
|
|
|
# a will be output w/ prob .9 / 3.2, b w/ prob 1.2 / 3.2 and c w/ prob 1.1 / 3.2 |
|
|
|
# |
|
|
|
# Testing: |
|
|
|
# for i in $(jot 1000); do (echo .5 a; echo 1.0 b; echo 1.0 c ) | sh randomline.fudge.sh ; done | sort | uniq -c |
|
|
|
# |
|
|
|
# a P(1/5) or ~200, b and c each P(2/5) or ~400 |
|
|
|
# |
|
|
|
# for i in $(jot 1000); do (echo .5 a; echo .5 b; echo .5 c; echo .5 d; echo .5 e; echo .5 f; echo .5 g; echo .5 h; echo 1.0 i ) | sh randomline.fudge.sh ; done | sort | uniq -c |
|
|
|
# |
|
|
|
|
|
|
|
# srand seed: |
|
|
|
# "dd if=/dev/random 2>/dev/null | LC_ALL=C tr -d -c 1-9 | dd bs=1 count=19 2>/dev/null" | getline seed; srand(seed) |
|
|
|
# + rand() |
|
|
|
|
|
|
|
LC_ALL=C awk ' |
|
|
|
function urandom(n) { |
|
|
|
# return random [0, n) |
|
|
|
|
|
|
|
# maxn needs to be 2**(x*4), and cbs=x |
|
|
|
maxn = 4294967296 |
|
|
|
|
|
|
|
if (n == 1) |
|
|
|
return 0 |
|
|
|
|
|
|
|
if (n > maxn) { |
|
|
|
printf("n too large!\n") > "/dev/stderr" |
|
|
|
exit(5) |
|
|
|
} |
|
|
|
|
|
|
|
for (;;) { |
|
|
|
"dd if=/dev/random | LC_ALL=C tr -c -d 0-9a-f | dd conv=unblock cbs=8" | getline a |
|
|
|
|
|
|
|
r = ("0x" a) + 0 |
|
|
|
quot = int(maxn / n) * n |
|
|
|
if (r >= quot) |
|
|
|
continue |
|
|
|
|
|
|
|
return r % n |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
function getfudge(i, j) { |
|
|
|
dot = index(i, ".") |
|
|
|
if (dot == 0) { |
|
|
|
printf("invalid fudge, no dot\n") > "/dev/stderr" |
|
|
|
exit(6) |
|
|
|
} |
|
|
|
|
|
|
|
# .33 |
|
|
|
precs = length(i) - dot |
|
|
|
#print "a" precs " " length(i) " " dot |
|
|
|
precsv = 1 |
|
|
|
for (j = 0; j < precs; j++) { |
|
|
|
precsv = 10 * precsv |
|
|
|
} |
|
|
|
return i + 0.0 |
|
|
|
} |
|
|
|
|
|
|
|
{ |
|
|
|
fudge = getfudge($1) * precsv |
|
|
|
|
|
|
|
# to be slightly better, split off length of $1, then find $2 and return that |
|
|
|
$0 = substr($0, index($0, $2)) |
|
|
|
|
|
|
|
if (NR == 1) { |
|
|
|
save = $0 |
|
|
|
# total fraction so far |
|
|
|
runfudge = fudge |
|
|
|
# precision of total fraction |
|
|
|
runprecsv = precsv |
|
|
|
next |
|
|
|
} |
|
|
|
|
|
|
|
if (runprecsv < precsv) { |
|
|
|
runfudge = runfudge * (precsv / runprecsv) |
|
|
|
runprecsv = precsv |
|
|
|
} else { |
|
|
|
fudge = fudge * (runprecsv / precsv) |
|
|
|
} |
|
|
|
|
|
|
|
#print "f: " fudge ", precs: " precs " " precsv ", runfudge: " runfudge " " runprecsv " " (runfudge + fudge) |
|
|
|
#"jot -r 1 0 " (NR - 1) | getline a |
|
|
|
a = urandom((runfudge + fudge)) |
|
|
|
#a = int(rand() * (NR)) |
|
|
|
#print "a: " a ", fudge: " fudge |
|
|
|
if (a < fudge) |
|
|
|
save = $0 |
|
|
|
|
|
|
|
runfudge += fudge |
|
|
|
} |
|
|
|
|
|
|
|
END { print save }' |