#! /usr/bin/env python
"""Skript um die Häufigkeiten von sogenannten Flickwörtern zu zählen."""

import sys, os, getopt, pipes
import tempfile

FALSE, TRUE = 0, -1

debug = FALSE
verbose = FALSE

wordlist = []

wordlistfilename = "/home/dune73/etc/flickwoerter"


def version ():
	print "Version 0.1 - Written by Christian Folini."

def usage ():
	print __doc__
	print
	print "Benutzung: " + sys.argv[0] + " [OPTIONS]"
	print
	print " -d --debug	  debug flag"
	print " -v --verbose      Mehr Informationen zum Programmablauf und eine Zusammenfassung der Resultate"
	print " -h --help         Hilfe"
	print " -u --usage        dito"
	print " -?                dito"
	print "    --version      Version ausgeben"
	print ""
	print "Die Resultate können in 'sort -n' gepiped werden um numerisch sortiert zu werden."


opts, args = getopt.getopt(sys.argv[1:], 'dhu?v', ['debug', 'verbose', 'usage', 'help', 'version'])

for opt in opts:
	name=opt[0]
	arg=opt[1]
	if name == '-d' or name == '--debug':
		debug = TRUE
	if name == '-v' or name == '--verbose':
		verbose = TRUE
	if name == '-h' or name == '-u' or name == '-?' \
		or name =='--help' or name == '--usage':
		usage()
		sys.exit()
	if name == '--version':
		version()
		sys.exit()

def dprint(text1, text2=''):
        if debug:
		if not text2:
                	print (text1)
		else:
			print text1,
			print text2
			
def vprint(text1, text2=''):
        if verbose:
		if not text2:
                	print (text1)
		else:
			print text1,
			print text2

def pipe(command):
        """A pipe command. Submit the shell-command and get the STDOUT."""
        tmpfile = tempfile.mktemp()
        p=pipes.Template()
        
        p.append(command, '--')
        file=p.open(tmpfile, 'w')
        file.close()
        
        file =open(tmpfile) 
        result = file.read()
        file.close()
        os.remove(tmpfile)
        
        return result


def load_wordlist():
        dprint("Wordliste aus Datei laden:  ", wordlistfilename)
	try:
        	file = open(wordlistfilename, "r")
	except:
		print "Datei " + wordlistfilename + " nicht gefunden. Pfad im script (=in diesem Programm) anpassen oder Datei installiertn."
		print "Abbruch."
		sys.exit(1)
        while 1:
                line = file.readline()
                if not line:
                        break
                if line[0] <> "#" and line[0] <> "\n":
                        wordlist.append(line[:-1])

        dprint ("Wortliste: ", wordlist)

def process():
	def sub_process(word, filename):
		str = "grep -c '" + word + "' " + filename + " 2>/dev/null"
		ret = [word, pipe(str)[:-1] ]
		return ret
	def wordcount(filename):
		ret = pipe("wc " + filename  + " | awk '{ print $2 }'")
		return ret

	def sum_counter(counter):
		sum = 0
		for item in counter:
			sum = sum + int(item[1])
		return sum
	def print_counter(counter):
		for item in counter:
			print item[1], item[0]
	
	counter = []
	for filename in args:
		vprint("Processing file", filename)
		for word in wordlist:
			counter.append(sub_process(word, filename))
	print_counter(counter)
	print
	total_fill = sum_counter(counter)
	total_words = int(pipe("wc " + filename  + " | awk '{ print $2 }'"))
	vprint("Summe der Flickwörter           : " + str(total_fill))
	vprint("Summe der Wörter                : " + str(total_words))
	vprint("Verhältnis Wörter / Flickwörter : " + str(float(total_words) / total_fill))
	vprint("(Höhere Werte bedeuten weniger Flickwörter.)")
def main():
	wordlist = load_wordlist()
	process()
			
if __name__ == '__main__':
	main ()
