Browse Source

fix cut of the snapaid utility

This is a set of scripts to make an index of all the snapshots, and
the message-id of the email that contains the hashes...

There is also the utility itself which uses this index to both find
out what snapshots are available for d/l, and to verify any snapshot
that may have already been d/l'd.
main
John-Mark Gurney 6 years ago
commit
db2bdca4ce
6 changed files with 557 additions and 0 deletions
  1. +2
    -0
      .gitignore
  2. +20
    -0
      NOTES.txt
  3. +85
    -0
      addinfo.sh
  4. +90
    -0
      mksnapidx.awk
  5. +346
    -0
      snapaid.sh
  6. +14
    -0
      splitbody.py

+ 2
- 0
.gitignore View File

@@ -0,0 +1,2 @@
arch
snapshot.*

+ 20
- 0
NOTES.txt View File

@@ -0,0 +1,20 @@
Fetch all of the archive:
scp -r freefall:/local/mail/archive/*/freebsd-snapshots .

XXX - don't use the following, doesn't decode body properly
Explode the mbox to files:
cat 201810* freebsd-snapshots | split -p '^From ' - snap.
The stdin trick is needed so the numbering/lettering continues properly.

Correct way to split the bodies off.
python splitbody.py arch/complete.txt arch/snap.

Expand all the "validly" formed messages to a file:
zcat $(zgrep -l 'PGP SIGNED MESSAGE' *) > ../complete.txt

We only lose 2012, so no big deal.

Bulk import:
for i in snap.??; do sh addinfo.sh -m < $i; done

-m is used to not check if they exist till the end.

+ 85
- 0
addinfo.sh View File

@@ -0,0 +1,85 @@
#!/bin/sh -

set -e

args=`getopt m $*`
if [ $? -ne 0 ]; then
echo 'Usage: $0 [ -m ]'
exit 2
fi
set -- $args
while :; do
case "$1" in
-m)
more=1
shift
;;
--)
shift; break
;;
esac
done

mkdir "$0.running"

# minimize file
tmpfname="tmp.snapinf.asc"
awk '
output != 1 && tolower($1) == "message-id:" {
print
next
}

$0 == "-----BEGIN PGP SIGNED MESSAGE-----" {
output = 1
}

output == 1 {
print
}

$0 == "-----END PGP SIGNATURE-----" {
output = 0
}' > "$tmpfname"

if ! gpg --verify "$tmpfname"; then
echo 'failed verify'
rm "$tmpfname"
rmdir "$0.running"
exit 1
fi

# process file
awk -f ./mksnapidx.awk "$tmpfname" > additional
rm "$tmpfname"

# only check if there isn't more to come
if [ x"$more" = x"1" ]; then
(cat snapshot.idx || :; cat additional) > snapshot.idx.new
(cat snapshot.complete.idx || :; cat additional) > snapshot.complete.idx.new
else
(xzcat snapshot.idx.xz; cat additional) | sort -u | awk '
{
if (!system("wget --method=HEAD " $9))
print
}
' > snapshot.idx.new

xz snapshot.idx.new

(xzcat snapshot.complete.idx.xz || :; cat additional) | sort -u > snapshot.complete.idx.new
xz snapshot.complete.idx.new
fi

rm additional

# install new indexes
if [ x"$more" = x"1" ]; then
mv snapshot.idx.new snapshot.idx
mv snapshot.complete.idx.new snapshot.complete.idx
else
mv snapshot.idx.new.xz snapshot.idx.xz
mv snapshot.complete.idx.new.xz snapshot.complete.idx.xz
fi

rmdir "$0.running"

+ 90
- 0
mksnapidx.awk View File

@@ -0,0 +1,90 @@
BEGIN {
vmroot = "https://download.freebsd.org/ftp/snapshots/VM-IMAGES/"
isoroot = "https://download.freebsd.org/ftp/snapshots/ISO-IMAGES/"
}

tolower($1) == "message-id:" {
MID=$2
sub(".*<", "", MID)
sub(">.*", "", MID)
}

$0 == "== ISO CHECKSUMS ==" {
root = isoroot
type = "iso"
}

$0 == "== VM IMAGE CHECKSUMS ==" {
root = vmroot
type = "vm"
}

function isdate(date) {
m = match(date, "[0-9]+")
if (m && RLENGTH == 8)
return 1

return 0
}

#FreeBSD-13.0-CURRENT-powerpc-powerpcspe-20181026-r339752-bootonly.iso
#FreeBSD-13.0-CURRENT-sparc64-20181026-r339752-bootonly.iso.asc
#FreeBSD-13.0-CURRENT-arm64-aarch64-PINE64-LTS-20181026-r339752.img.xz
#FreeBSD-13.0-CURRENT-i386-20181026-r339752.vmdk.xz

$1 == "SHA512" {
# Strip parens
fname = substr($2, 2, length($2) - 2)

split(fname, dotparts, ".")

# recombine around version string, strips of ALL extensions (including vm type)
basename = dotparts[1] "." dotparts[2]

cnt = split(basename, parts, "-")

# make arch part, may include additional part
arch = parts[4]
basearch = arch
if (parts[4] == "arm" || (parts[4] == "powerpc" && parts[5] == "powerpcspe") || parts[4] == "arm64") {
basearch = parts[5]
arch = parts[4] "-" parts[5]
nextidx = 6
} else
nextidx = 5

# find date, may be platform first
if (isdate(parts[nextidx])) {
platform = "xxx"
date = parts[nextidx]
nextidx += 1
} else {
platform = parts[nextidx]
date = parts[nextidx + 1]
if (isdate(date)) {
nextidx += 2
} else {
date = parts[nextidx + 2]
platform = parts[nextidx] "-" parts[nextidx + 1]
nextidx += 3
}
}

if (nextidx == cnt)
vers="xxx"
else {
vers=""
sep=""
for (i = nextidx + 1; i <= cnt; i++) {
vers = vers sep parts[i]
sep="-"
}
}
if (type == "vm") {
vers = dotparts[3]
url = root parts[2] "-" parts[3] "/" basearch "/" date "/" fname
} else
url = root parts[2] "/" fname

printf("%s %s %s %s %s %s %s %s %s %s\n", type, parts[2] "-" parts[3], arch, platform, date, parts[nextidx], vers, fname, url, MID)
}

+ 346
- 0
snapaid.sh View File

@@ -0,0 +1,346 @@
#!/bin/sh -

STOREDIR="$HOME/.snapaid"

setdefaults() {
GPG=$(which gpg2)
WGET=$(which wget)
SHASUM=$(which shasum)
}
setdefaults

if [ ! -x "$GPG" ]; then
echo 'Failed to find gpg2 executable'
exit 1
fi

if [ ! -x "$WGET" ]; then
echo 'Failed to find wget executable'
exit 1
fi

if [ ! -x "$SHASUM" ]; then
echo 'Failed to find shasum executable'
exit 1
fi

#wget:
# -N for timestamps
# --backups=x for backing up

completeurl="https://www.funkthat.com/~jmg/FreeBSD-snap/snapshot.complete.idx.xz"
currenturl="https://www.funkthat.com/~jmg/FreeBSD-snap/snapshot.idx.xz"

# type release arch platform date svnrev xxx fname url mid
# 1 2 3 4 5 6 7 8 9 10
# iso 11.1-STABLE arm-armv6 BEAGLEBONE 20180315 r330998 xxx FreeBSD-11.1-STABLE-arm-armv6-BEAGLEBONE-20180315-r330998.img.xz https://download.freebsd.org/ftp/snapshots/ISO-IMAGES/11.1/FreeBSD-11.1-STABLE-arm-armv6-BEAGLEBONE-20180315-r330998.img.xz 20180316000842.GA7399@FreeBSD.org

set -e

# This is used for some testing functions
copy_function() {
declare -F "$1" > /dev/null || return 1
local func="$(declare -f "$1")"
eval "${2}(${func#*\(}"
}

# Test function to cause a bad input
cmd_failure() {
exit 1
}

# First time fails, second time run real command
gpg_first_fails() {
copy_function verifygpg_orig verifygpg
return 1
}

# Make sure that the storage directory is present
mkstore() {
mkdir "$STOREDIR" 2>/dev/null || :
}

# Given a message id, get the raw body and store it.
get_raw() {
mkstore

mid="$1"

midfile="$STOREDIR/$mid".raw

if [ ! -e "$midfile" ]; then
# get the location, it's a database lookup
loc=$($WGET --max-redirect=0 --method=HEAD -S -o - -O - 'https://docs.freebsd.org/cgi/mid.cgi?'"$mid" 2>/dev/null | awk 'tolower($1) == "location:" { print $2; exit }')

# if it's relative, add https
if [ x"$loc" != x"${loc#//}" ]; then
# add https
loc="https:$loc"
fi

# get the raw part
tmpfile="$STOREDIR/.tmp.$$.$mid".raw

# strip out everything but message id and first signed part
$WGET -O - "$loc"+raw 2>/dev/null | awk '
tolower($1) == "message-id:" && check == 0 {
print
}

$0 == "-----BEGIN PGP SIGNED MESSAGE-----" {
sigbody = 1
}

sigbody {
print
}

$0 == "-----END PGP SIGNATURE-----" {
sigbody = 0
}' > "$tmpfile"

if verifygpg "$tmpfile"; then
mv "$tmpfile" "$STOREDIR/$mid.raw"
else
rm "$tmpfile"
echo Bad signature from mail archive.
return 1
fi
else
if ! verifygpg "$midfile"; then
rm "$midfile"
get_raw "$mid"
return $?
fi
fi
}

fetch() {
mkstore

(cd "$STOREDIR" && $WGET -N "$1" >/dev/null 2>&1)
}

getvermid() {
xzcat "$STOREDIR"/snapshot.complete.idx.xz | awk '$8 == fname {
print $10
}' fname="$i"

}

# takes basename of arg, which much exist in STOREDIR, and verifies
# that the signature is valid.
verifygpg() {
local fname
fname=$(basename "$1")
if ! (cd "$STOREDIR" && $GPG --verify "$fname" 2> /dev/null); then
echo 'ERROR: PGP signature verification failed!'
return 1
fi
}

# Verifies the file
verifyfile() {
local fname
local hashinfo
local algo hash

fname="$STOREDIR/${1}.raw"
hashinfo=$(awk '
check && $2 == "('"$2"')" {
hashes[$1] = $4
}

$0 == "-----BEGIN PGP SIGNED MESSAGE-----" {
check = 1
}

$0 == "-----BEGIN PGP SIGNATURE-----" {
check = 0
}

END {
if ("SHA512" in hashes)
algo = "SHA512"
else if ("SHA256" in hashes)
algo = "SHA256"
else {
print "unkn BADHASH"
exit 1
}

print algo " " hashes[algo]
}
' "$fname")
read algo hash <<-EOF
${hashinfo}
EOF

if [ x"$algo" == x"unkn" -o x"$algo" = x"" ]; then
echo 'Unable to find hash for file.'
exit 1
fi

echo "$hash $2" | $SHASUM -a "${algo#SHA}" -c -
}

if [ x"$1" = x"verify" ]; then
shift

fetch "$completeurl"

for i in "$@"; do
vermid=$(getvermid "$i")
if [ x"$vermid" = x"" ]; then
echo "Unable to find entry for: $i"
continue
fi

get_raw "$vermid"
if ! verifygpg "$vermid".raw; then
echo "Unable to verify: $i"
fi

verifyfile "$verurl" "$i"
done
elif [ x"$1" = x"find" ]; then
fetch "$currenturl"

tmpdir=$(mktemp -d -t snapaid)

trap "rm -rf $tmpdir" 0

( cd "$tmpdir";
xzcat "$STOREDIR"/snapshot.idx.xz | sort -r -k 5 > selection;
while :; do
# display current list
cnt=$(wc -l < selection)
awk '
BEGIN {
fmtstr = "%2s %-3s %-15s %-18s %-18s %-8s %-7s\n"
printf(fmtstr, "#", "TYP", "RELEASE", "ARCH", "PLATFORM/TYPE", "DATE", "SVNREV")
cnt = 1
}

{
if ($4 == "xxx")
plt=$7
else
plt=$4
printf(fmtstr, cnt, $1, $2, $3, plt, $5, $6)
if (cnt >= 20)
exit 0

cnt += 1
}
' selection

read -p 'Select image, enter search term, reset, or quit: ' sel
if [ x"$sel" = x"reset" ]; then
xzcat "$STOREDIR"/snapshot.idx.xz | sort -r -k 5 > selection;
continue
elif [ x"$sel" = x"quit" ]; then
echo "$sel" > sel
break
fi

if [ "$cnt" -gt 20 ]; then
cnt=20
fi

if [ "$sel" -ge 1 -a "$sel" -le "$cnt" ] 2>/dev/null; then
echo selected image $sel
echo $(tail -n +"$sel" selection | head -n 1) > sel
break
else
# restrict

if ! grep -- "$sel" selection > selection.new; then
echo WARNING: Ignoring selection, no results.
else
mv selection.new selection
fi
fi
done
)

sel=$(cat "$tmpdir"/sel)
if [ x"$sel" = x"quit" ]; then
exit 0
fi

echo $sel
fname=$(cut -f 8 -d ' ' "$tmpdir"/sel)
dlurl=$(cut -f 9 -d ' ' "$tmpdir"/sel)
verurl=$(cut -f 10 -d ' ' "$tmpdir"/sel)

# fetch link
$WGET -- "$dlurl"

# verify image
fetch "$verurl"
if ! verifygpg "$verurl"; then
echo "Unable to verify: $fname"
fi

if ! verifyfile "$verurl" "$fname"; then
rm "$fname"
fi
elif [ x"$1" = x"test" ]; then
# Run various tests

# Test getting the raw file
echo 'Testing get_raw success...'
mid='20160122055622.GA87581@FreeBSD.org'
get_raw "$mid"

# Verify resulsts
(cd "$STOREDIR" && echo '6e53df5995b6cc423c7f2d63b6df52d5d7f70e8586c25f91433fd8a1a2466e77be6a38884bde8bedd9ff6e7deb0215a66e1c2a16e4955503c20445e649a5fb47 20160122055622.GA87581@FreeBSD.org.raw' | $SHASUM -a 512 -c)
echo passed

# If the file already exists, but fails verification, that
# it will refetch and be correct
echo 'Testing get_raw with file already present that fails verification...'
copy_function verifygpg verifygpg_orig
copy_function gpg_first_fails verifygpg
get_raw "$mid"

(cd "$STOREDIR" && echo '6e53df5995b6cc423c7f2d63b6df52d5d7f70e8586c25f91433fd8a1a2466e77be6a38884bde8bedd9ff6e7deb0215a66e1c2a16e4955503c20445e649a5fb47 20160122055622.GA87581@FreeBSD.org.raw' | $SHASUM -a 512 -c)

echo passed

# If the file already exists, a "broken" wget won't cause
# a problem
echo 'Testing get_raw with file already present...'
WGET=cmd_failure
get_raw "$mid"

echo passed

# Test failure
echo 'Testing get_raw fails w/ bad data...'
WGET=cmd_failure
rm "$STOREDIR/$mid.raw"

# it should fail
! get_raw "$mid"

# and the desired file should not exist
if [ -e "$STOREDIR/$mid.raw" ]; then
echo 'Test failed!'
exit 1;
fi
echo passed

setdefaults
else
echo "Unknown verb: $1"
echo "Usage:"
echo " $0 verify file ..."
echo " $0 find"
echo ""
echo "The verify option will attempt to verify each file specified."
echo ""
echo "The find option will start up an interactive session to find"
echo "and select the snapshot to download and verify."
fi

+ 14
- 0
splitbody.py View File

@@ -0,0 +1,14 @@
import itertools
import mailbox
import sys

if __name__ == '__main__':
cnt = itertools.count()
mb = mailbox.mbox(sys.argv[1])
for i in mb.itervalues():
body = i.get_payload()
if isinstance(body, list):
continue
with open('%s%04d' % (sys.argv[2], cnt.next()), 'w') as fp:
print >>fp, 'Message-ID:', i['message-id']
fp.write(body)

Loading…
Cancel
Save