# *.* coding: utf-8 *.*
import abc

from hyphenation_rule import HyphenationRule

class FinnishHyphenationRule(HyphenationRule):
    '''
    This class represents the set of hyphenation rules for the Finnish language.
    The class "implements" the interface HyphenationRule, but as it does not implement
    all the required methods, it is declared abstract.
    
    The class provides Finnish language specific services useful for the subclasses.
    '''
    
    __metaclass__ = abc.ABCMeta

    OUT_OF_RANGE = -1
    # This constant can be used by any class implementing this interface to
    # inform about a position outside a given word. This value is never
    # returned by next_hyphen.
    
    VOWELS = u'AEIOUY\u00C4\u00D6'

    list = (u"AI", u"EI", u"II", u"OI", u"UI", u"YI", u"\u00C4I", # ÄI
                u"\u00D6I", # ÖI
                u"AU", u"EU", u"EY", u"IE", u"IU", u"OU", u"UO", u"Y\u00D6", # YÖ
                u"\u00C4Y", # ÄY
                u"\u00D6Y")
    DIPHTHONGS = set(list)
    # The Diphthong set of finnish language. As this is a constant, the set
    # used to store it is defined in a constant. Note also the naming
    # convention for constants (ALL CAPITALS)
    
    # When you use the dipththong set, remember first to use the proper String
    # methods to get your data uppercase, as all the dipthongs are stored that
    # way.


    # A static initializer block, which initializes the data in the DIPHTHONGS
    # set. You can use the set to see whether any two letters form a diphthong.

    # Some of you might have noticed that the Goblin has trouble with the
    # scandinavian letters in the code. Goblin internally uses UTF-8 encoding
    # and expects all submitted files to be in this encoding. If you were shown
    # error messages this means that you most probably used either ISO-latin-1
    # or CP-1252 encoding.

    # In Python, characters can be presented with the the basic latin alphabet
    # by using unicode escapes. Our problematic letters are the following.

    # \u00C4 A with diaresis
    # \u00D6 O with diaresis    
    
    # You can change your encoding in eclipse by left-clicking the project icon
    #(in package explorer) and choosing preferences -> resources -> text file encoding -> other -> UTF8
    

    def is_finnish_diphthong(self, first_letter, second_letter):
        '''
        Tests if the given pair of characters form a diphthong.
        
        @param first_letter: the first letter of the diphthong candidate
        @param second_letter: the second letter of the diphthong candidate
        @return: True if the letters form a diphthong, otherwise False.
        '''
       
        diphthong_candidate = first_letter.upper() + second_letter.upper()
        
        return diphthong_candidate in FinnishHyphenationRule.DIPHTHONGS
    
    
    def is_finnish_vowel(self, letter_to_check):
        '''
        Checks whether a character is a wovel in the Finnish language.
        
        @param letter: character to check
        @return: is the input a vowel (True/False)
        '''

        letter = letter_to_check.upper()

        return letter in FinnishHyphenationRule.VOWELS


    def get_next_vowel(self, word, start_from):
        '''
        Find the index of the next Finnish vowel starting from the given position (inclusive).
        
        @param word: the word to search from.
        @param start_from: the starting position (inclusive) 
        @return: position of the next character that is a Finnish vowel (int)
        '''
        current_location = start_from
        while current_location < len(word):
            if self.is_finnish_vowel(word[current_location]):
                return current_location
            current_location += 1

        return FinnishHyphenationRule.OUT_OF_RANGE


    def get_next_consonant(self, word, start_from):
        '''
        Find the index of the next Finnish consonant starting from the given position (inclusive).
        
        @param word: the word to search from.
        @param start_from: the starting position (inclusive)
        @type: int
        @return: position of the next character that is a Finnish consonant (int)
        '''

        current_location = start_from
        while current_location < len(word):
            if not self.is_finnish_vowel(word[current_location]):
                return current_location
            current_location += 1

        return FinnishHyphenationRule.OUT_OF_RANGE
