#!/bin/sh
# tweak
# Does some cheeky post-processing of the transcription results.

printf %s\\n "$(awk '
	function rep(a, b) {
		gsub("^"a"$", b); gsub("^"a" ", b" "); gsub(" "a"$", " "b)
	}
	{ line = $0 }
	NF < 5 {
		# nudge
		rep("or do you", "audio")
		rep("or the you", "audio")
		rep("or do", "audio")
		rep("bite", "byte")
		rep("bites", "bytes")
		rep("cash", "cache")
		rep("flute", "float")
		rep("mt", "empty")
		rep("colonel", "kernel")
		rep("mean", "main")
		rep("notify sand", "notify send")
		rep("oh to put", "output")
		rep("oh put", "output")
		rep("to put", "output")
		rep("oh to pit", "output")
		rep("oh to poop", "output")
		rep("oh poop", "output")
		rep("freeze", "phrase")
		rep("freezes", "phrases")
		rep("read direct", "redirect")
		rep("sighs", "size")
		sub(/^turn$/, "return"); sub(/^turn /, "return ")
		rep("heidi", "tidy")
		rep("warren", "warn")
		if (!/down|up|left/) sub(/^right /, "write ")

		# unknown words
		rep("the limit her", "delimiter")
		rep("limit her", "delimiter")
		rep("separate her", "separator")
		rep("separate tour", "separator")
		rep("to pull", "tuple")
		rep("to poop", "tuple")
		rep("drupal", "tuple")

		if (ENVIRON["numen_fmt"] !~ /sentence|title|upper/)
			sub(/^the /, "")
	}
	{ print; print line }
' "$NUMEN_STATE_DIR/transcripts" | awk '!seen[$0]++')" > "$NUMEN_STATE_DIR/transcripts"
