Browse Source

add a script for selecting a random line w/ a weight...

main
John-Mark Gurney 1 month ago
parent
commit
2aa9cb717d
1 changed files with 104 additions and 0 deletions
  1. +104
    -0
      randomline.fudge.sh

+ 104
- 0
randomline.fudge.sh View File

@@ -0,0 +1,104 @@
#!/bin/sh -

# Input, a file with a weight following by contents. The weight
# is a number w/ a decimal point that is a weight factor that the
# line will be returned. That is, the probability of a line being
# selected and printed is weight / (sum of all weights).
#
# Sample input file:
# .9 a
# 1.2 b
# 1.1 c
#
# a will be output w/ prob .9 / 3.2, b w/ prob 1.2 / 3.2 and c w/ prob 1.1 / 3.2
#
# Testing:
# for i in $(jot 1000); do (echo .5 a; echo 1.0 b; echo 1.0 c ) | sh randomline.fudge.sh ; done | sort | uniq -c
#
# a P(1/5) or ~200, b and c each P(2/5) or ~400
#
# for i in $(jot 1000); do (echo .5 a; echo .5 b; echo .5 c; echo .5 d; echo .5 e; echo .5 f; echo .5 g; echo .5 h; echo 1.0 i ) | sh randomline.fudge.sh ; done | sort | uniq -c
#

# srand seed:
# "dd if=/dev/random 2>/dev/null | LC_ALL=C tr -d -c 1-9 | dd bs=1 count=19 2>/dev/null" | getline seed; srand(seed)
# + rand()

LC_ALL=C awk '
function urandom(n) {
# return random [0, n)

# maxn needs to be 2**(x*4), and cbs=x
maxn = 4294967296

if (n == 1)
return 0

if (n > maxn) {
printf("n too large!\n") > "/dev/stderr"
exit(5)
}

for (;;) {
"dd if=/dev/random | LC_ALL=C tr -c -d 0-9a-f | dd conv=unblock cbs=8" | getline a

r = ("0x" a) + 0
quot = int(maxn / n) * n
if (r >= quot)
continue

return r % n
}
}

function getfudge(i, j) {
dot = index(i, ".")
if (dot == 0) {
printf("invalid fudge, no dot\n") > "/dev/stderr"
exit(6)
}

# .33
precs = length(i) - dot
#print "a" precs " " length(i) " " dot
precsv = 1
for (j = 0; j < precs; j++) {
precsv = 10 * precsv
}
return i + 0.0
}

{
fudge = getfudge($1) * precsv

# to be slightly better, split off length of $1, then find $2 and return that
$0 = substr($0, index($0, $2))

if (NR == 1) {
save = $0
# total fraction so far
runfudge = fudge
# precision of total fraction
runprecsv = precsv
next
}

if (runprecsv < precsv) {
runfudge = runfudge * (precsv / runprecsv)
runprecsv = precsv
} else {
fudge = fudge * (runprecsv / precsv)
}

#print "f: " fudge ", precs: " precs " " precsv ", runfudge: " runfudge " " runprecsv " " (runfudge + fudge)
#"jot -r 1 0 " (NR - 1) | getline a
a = urandom((runfudge + fudge))
#a = int(rand() * (NR))
#print "a: " a ", fudge: " fudge
if (a < fudge)
save = $0

runfudge += fudge
}

END { print save }'

Loading…
Cancel
Save