#!/usr/bin/python
# -*- coding: utf-8 -*-

###########################################################################
#    OCRFeeder - The complete OCR suite
#    Copyright (C) 2009 Joaquim Rocha
# 
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
###########################################################################

import sys
import os
import gtk
import math
from ocrfeeder.util.cliutils import ArgsRetriever
from ocrfeeder.util.graphics import getBoundsFromStartEndPoints, convertPixbufToImage, getTextSizeFromImage
from ocrfeeder.feeder.ocrEngines import Engine, OcrEnginesManager
from ocrfeeder.studio.configuration import ConfigurationManager
from ocrfeeder.studio.dataHolder import DataBox, PageData, TEXT_TYPE, IMAGE_TYPE
from ocrfeeder.feeder.imageManipulation import *
from ocrfeeder.feeder.layoutAnalysis import *
from ocrfeeder.feeder.documentGeneration import OdtGenerator, HtmlGenerator

def printError(message):
	print message
	sys.exit(1)

cli_command_retriever = ArgsRetriever(sys.argv)

if cli_command_retriever.hasCommand('--help'):
        printError('Usage:\n'
                   '\tocrfeeder-cli --o OUTPUT_FILE [OPTIONS]\n\n'
                   'Required options:\n\n'
                   '\t--o OUTPUT_FILE\n'
                   '\t\tName for the generated file\n\n'
                   'Other options:\n\n'
                   '\t--images IMAGE1 IMAGE2 ...\n'
                   '\t\tImages to be recognized\n\n'
                   '\t--format FORMAT\n'
                   '\t\tFormat for the generated document (ODT or HTML)\n\n'
                   '\t--window-size WINDOW_SIZE\n'
                   '\t\tDesired window size of OCRFeeder\'s segmentation\n'
                   '\t\talgorithm. If this option is not used, the window\n'
                   '\t\tsize will be calculated automatically')

images = cli_command_retriever.getParams('--images')
window_size = cli_command_retriever.getParams('--window-size') or None
export_format = cli_command_retriever.getParams('--format')
file_name = cli_command_retriever.getParams('--o')
if not file_name:
	printError('Usage: ocrfeeder-cli --o OUTPUT_FILE [OPTIONS]\n'
                   'Try \'ocrfeeder --help\' for more information')
	
file_name = file_name.pop()

configuration_manager = ConfigurationManager()
ocr_engines_manager = OcrEnginesManager(configuration_manager)
ocr_engines_manager.makeEnginesFromFolder(configuration_manager.user_engines_folder)
ocr_engines = ocr_engines_manager.ocr_engines

if not ocr_engines:
	printError('Error: unable to intialise OCR engine.')

# XXX: which engine to choose when there's more than one?
engine = ocr_engines[0]

def createDataBoxFromBlock(block, image, window_size):
	leftmost_x, highest_y, rightmost_x, lowest_y = (block.translateToUnits(window_size))
	rightmost_x = min(rightmost_x, image.size[0])
	lowest_y = min(lowest_y, image.size[1])
	dimensions = getBoundsFromStartEndPoints((leftmost_x, highest_y), (rightmost_x, lowest_y))
	x, y, width, height = dimensions
	image_clip = image.crop((int(leftmost_x), int(highest_y), int(rightmost_x), int(lowest_y)))
	return DataBox(x, y, width, height, image_clip)


pages = []
for image in images:
	if not os.path.isfile(image):
		printError("Error: The image '%s' is not a file or does not exist." % image)
	page_data = PageData(image)
	data_boxes = []
	image_obj = Image.open(image)
	image_processor = ImageProcessor(image, window_size)
	block_retriever = BlockRetriever(image_processor.imageToBinary())
	blocks = block_retriever.getAllBlocks()
	engine, engine_file = engine
	for block in blocks:
		data_box = createDataBoxFromBlock(block, image_obj, image_processor.window_size)
		engine.setImage(data_box.image)
		text = engine.read()
		type = engine.classify(text)
		if type == TEXT_TYPE:
			text_size = getTextSizeFromImage(Image.open(image))
			if text_size:
				y_resolution = float(page_data.resolution[1])
				text_size /= y_resolution
				text_size *= 72.0
				data_box.setFontSize(math.floor(text_size))
			data_box.setText(text)
		data_box.setType(type)
		data_boxes.append(data_box)
	page_data.data_boxes = data_boxes
	pages.append(page_data)

document_generator = OdtGenerator(file_name)
if 'HTML' in export_format:
	document_generator = HtmlGenerator(file_name)
for page in pages:
	document_generator.addPage(page)
document_generator.save()
