B
    b[àb×  ã               @   sê   d dl mZ d dlm  mZ d dlZd dlZe e	dƒ¡Z
e
d d e de
d d  d ¡e
d d	 e de
d d	  d ¡iZeed
œdd„Zejdœdd„Zdd„ Zejdœdd„Zee ejedœdd„Zeejdœdd„ZdS )é    )ÚListNzflibl_config.jsonÚ	languagesÚmain_languagez([^Zvalid_charactersz])Úchild_language)ÚphraseÚlgc             C   sz   | rrdd„ dd„ t |  | ¡D ƒD ƒ}g }d}x>|D ]6}|dkrJ||7 }q4|r`| d| ¡ d}| |¡ q4W |S g S dS )z÷Tokenize an utterance based on specified word-forming characters.

    Parameters:
        phrase: the string to be tokenized
        lg: the language whose word-forming characters are to be used in tokenization
    
    Return list of tokens
    c             S   s   g | ]}|r|‘qS © r   )Ú.0Újr   r   ú-/home/sunny/Documents/lx/flexible/flexible.pyú
<listcomp>   s    ztokenize.<locals>.<listcomp>c             S   s   g | ]}|  ¡ ‘qS r   )Ústrip)r	   Úir   r   r   r      s    Ú Ú.ú N)Úword_formingÚsplitÚappend)r   r   ZtokensZcollected_tokensZpunct_charsÚtokenr   r   r   Útokenize   s    	"

r   )Úelc          
   C   s(   t d| jdt| jƒd| jdt| ƒƒ dS )z„Print the tag, attributes, text, and number of children of an ET.Element
    
    Parameters:
        el: Element to be printed
    zTag:z
Attrs:z
Text:z
No. of Children:N)ÚprintÚtagÚstrÚattribÚtextÚlen)r   r   r   r   Úprint_el_info)   s    r   c              C   s¢   t tddd ¡ ƒd } t| ƒdd… }ddt|ƒ  | }|dd	… d
 |d	d…  d
 |dd…  d
 |dd…  d
 |dd…  }tddd t| ƒ¡ |S )zšGenerate FLEx guid based on offset defined in offset.txt

    Increments offset upon use.

    Return guid in format [0-f]{8}-([0-f]{4}-){3}[0-f]{12}
    z
offset.txtÚr)Úmodeé   é   NÚ0é    é   ú-é   é   é   Úw)ÚintÚopenÚreadÚhexr   Úwriter   )Zglobal_offsetZnew_guid_numZnew_guid_strZnew_guidr   r   r   Úgenerate_guid1   s    Lr0   )Úeaf_rootc             C   s   dd„ |   d¡D ƒS )zÈGet time IDs and values from an EAF file
    
    Parameters:
        eaf_root: is the root element of an EAF object parsed through ElementTree

    Return a dictionary of time ID and value pairs
    c             S   s   i | ]}|j d  |j d “qS )Z
TIME_VALUEZTIME_SLOT_ID)r   )r	   r   r   r   r   ú
<dictcomp>H   s   ztime_values.<locals>.<dictcomp>z.//TIME_SLOT)Úfindall)r1   r   r   r   Útime_values@   s    r4   )Útokenized_uttÚ	phrase_elr   c       	      C   s„   t  d¡}t| }xb| D ]Z}t jddtƒ id}| |¡r@d}nd}t jd||dœd}||_| |¡ | |¡ qW | |¡ d	S )
aš  Populate a phrase element with a tokenized utterance

    *tokenized_utt* is a list with tokens from an utterance
    *phrase_el* is the element that will be the parent to the words added (below the words el in the phrase_el will be the items w/ translations and notes)
    *lg* is the language whose word-forming characters are to be used in tokenization
    
    Makes changes in place (returns nothing)
    ÚwordsÚwordÚguid)r   ÚpunctZtxtÚitem)ÚtypeÚlangN)ÚETÚElementr   r0   Úsearchr   r   )	r5   r6   r   r7   Zutterance_word_formingr   r8   r<   Ztoken_elr   r   r   Úadd_word_elL   s    	



rA   )Ú
constraintÚeafc             C   sH   xB|  d | ¡¡D ].}x(|  d |jd ¡¡D ]}| |¡ q.W qW dS )zºRemove the tiers that use the "Included In" stereotype constraint

    Parameters:
        eaf: The root element of an EAF file
    
    Makes changes in place (returns nothing)    
    z.//*[@CONSTRAINTS='{}']z.//*[@LINGUISTIC_TYPE_REF='{}']ZLINGUISTIC_TYPE_IDN)r3   Úformatr   Úremove)rB   rC   Zconstraint_tierZbad_tierr   r   r   Úremove_constrainte   s    rF   )Útypingr   Úxml.etree.ElementTreeÚetreeÚElementTreer>   ÚjsonÚreÚloadr,   ÚconfigÚcompiler   r   r   r?   r   r0   r4   rA   rF   r   r   r   r   Ú<module>   s   "&