B
    Yb!                 @   s   d dl mZ d dlZd dlm  mZ eedddZej	dddZ
d	d
 Zej	dddZee ej	dddZej	dddZdS )    )ListN)phraselgc             C   sV   |dkrt d}n|dkr(t d}ntd| rRdd dd || D D S g S )	zTokenize an utterance based on specified word-forming characters.

    Parameters:
        phrase: the string to be tokenized
        lg: the language whose word-forming characters are to be used in tokenization
    
    Return list of tokens
    tcaz([^0-9A-Za-z\+#_])Zmtou   ([^A-Za-zäëöü'])z/Language either not given or an invalid string.c             S   s   g | ]}|r|qS  r   ).0jr   r   //home/sunny/Documents/lx/flexible/flexible01.py
<listcomp>   s    ztokenize.<locals>.<listcomp>c             S   s   g | ]}|  qS r   )strip)r   ir   r   r	   r
      s    )recompile
ValueErrorsplit)r   r   Znon_word_formingr   r   r	   tokenize   s    
r   )elc          
   C   s(   t d| jdt| jd| jdt|  dS )zPrint the tag, attributes, text, and number of children of an ET.Element
    
    Parameters:
        el: Element to be printed
    zTag:z
Attrs:z
Text:z
No. of Children:N)printtagstrattribtextlen)r   r   r   r	   print_el_info   s    r   c              C   s   t tddd d } t| dd }ddt|  | }|dd	 d
 |d	d  d
 |dd  d
 |dd  d
 |dd  }tdddt|  |S )zGenerate FLEx guid based on offset defined in offset.txt

    Increments offset upon use.

    Return guid in format [0-f]{8}-([0-f]{4}-){3}[0-f]{12}
    z
offset.txtr)mode      N0       -         w)intopenreadhexr   writer   )Zglobal_offsetZnew_guid_numZnew_guid_strZnew_guidr   r   r	   generate_guid#   s    Lr+   )eaf_rootc             C   s   dd |  dD S )zGet time IDs and values from an EAF file
    
    Parameters:
        eaf_root: is the root element of an EAF object parsed through ElementTree

    Return a dictionary of time ID and value pairs
    c             S   s   i | ]}|j d  |j d qS )Z
TIME_VALUEZTIME_SLOT_ID)r   )r   r   r   r   r	   
<dictcomp>>   s   ztime_values.<locals>.<dictcomp>z.//TIME_SLOT)findall)r,   r   r   r	   time_values2   s    r/   )tokenized_utt	phrase_elc             C   s   t d}xl| D ]d}t jddt id}td}||rBd}nd}t jd|d	d
d}||_|| || qW || dS )aD  Populate a phrase element with a tokenized utterance

    *tokenized_utt* is a list with tokens from an utterance
    *phrase_el* is the element that will be the parent to the words added (below the words el in the phrase_el will be the items w/ translations and notes)
    
    Makes changes in place (returns nothing)
    wordswordguid)r   z([^0-9A-Za-z\+#_])punctZtxtitemztca-fonipa-x-etic)typelangN)ETElementr+   r   r   searchr   append)r0   r1   r2   tokenr3   Zword_formingr7   Ztoken_elr   r   r	   add_word_elB   s    




r>   )eafc             C   sJ   xD|  dD ]6}x0|  dd|jd  d D ]}| | q0W qW dS )zRemove the tiers that use the "Included In" stereotype constraint

    Parameters:
        eaf: The root element of an EAF file
    
    Makes changes in place (returns nothing)    
    z .//*[@CONSTRAINTS='Included_In']z.//*[@LINGUISTIC_TYPE_REF={}]'ZLINGUISTIC_TYPE_IDN)r.   formatr   remove)r?   Zincluded_inZbad_tierr   r   r	   remove_included_in[   s    $rC   )typingr   r   xml.etree.ElementTreeetreeElementTreer9   r   r   r:   r   r+   r/   r>   rC   r   r   r   r	   <module>   s   	