#!/bin/bash
#
# Author: Maurice de la Ferté <maurice.ferte@basyskom.de>
# Version: 0.1
# Date: 2011-02-26
#

function usage() {
cat << EOF

Usage: ${0} <FILE 1> <FILE 2> [<FILE N>]
    A script to compare the content of several text files. The differences
    between files like e.g. package lists will be shown with the related file
    name. This script will group the matching first line entries (whitespace
    separeted) to blocks.
EOF
}

# comparison <FILE1> <FILE2> <OUTPUT>
function comparison() {
	local FILE1=${1}
	local FILE2=${2}
	local OUTPUT=${3}
	local TMP=$(dirname ${OUTPUT})

	# create a diff and remove unneeded information
	diff -U 1 -dHbrN -- ${FILE1} ${FILE2} | \
		grep -v '^@@\|^--\|^++' | grep '^[+-]' > ${TMP}/diff

	# append in case of differences the related filename to the end of line
	cat ${TMP}/diff | grep "^+" | sed 's/^+//g' | sed "s|$|\t\t(${FILE2})|g" \
		>> ${OUTPUT}
	cat ${TMP}/diff | grep "^-" | sed 's/^-//g' | sed "s|$|\t\t(${FILE1})|g" \
		>> ${OUTPUT}
	rm -f ${TMP}/diff
}

# blockseparation <INPUT> <OUTPUT>
function blockseparation() {
	local INPUT=${1}
	local OUTPUT=${2}
	while read line; do

		pattern=$(echo ${line} | awk '{print $1}')
		if [ "${pattern}" != "${pattern_before}" ]; then
			echo "--" >> ${OUTPUT}
		fi
		pattern_before=${pattern}

		echo ${line} >> ${OUTPUT}
	done < ${INPUT}
}

# some checks
if [ '2' -gt "${#}" ]; then
	echo "" >&2
	echo "${0}: Error, more arguments needed" >&2
	usage >&2
	exit 1
fi

for file in ${@} ; do
	if [ ! -f "${file}" ]; then
		echo "" >&2
		echo "${0}: Error, FILE ${file} not found" >&2
		usage >&2
		exit 1
	fi
done

#       the compare logic
#
#    FILE 1   <--->    FILE N (N > 2)
#    FILE 1   <--->    FILE 2
#    FILE 2   <--->    FILE 3
#     ...     <--->     ...
#  FILE N-1   <--->    FILE N

TEMPDIR=$(mktemp -d)
LASTFILE=$(echo ${@} | awk '{print $NF}')
COMPARE_OUTPUT=${TEMPDIR}/compare

# in case of N > 2 compare first with last file
if [ '2' -ne "${#}" ]; then
	comparison ${1} ${LASTFILE} ${COMPARE_OUTPUT}
fi

while [ -n "${2}" ] ; do
	# skip comparing of same files
	if [ "${1}" != "$2" ]; then
		comparison ${1} ${2} ${COMPARE_OUTPUT}
	fi
	shift
done

# data processing
if [ -f "${COMPARE_OUTPUT}" ]; then
	cat ${COMPARE_OUTPUT} | sort -u > ${TEMPDIR}/unique
	blockseparation ${TEMPDIR}/unique ${TEMPDIR}/beautified
	cat ${TEMPDIR}/beautified
fi

rm -rf ${TEMPDIR}
