U
    J6Be"                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZ d dlmZ d dlmZ G dd	 d	eZG d
d dZdS )    N)AttrDict)tqdm)evaluate_functional_correctness)AutoTokenizerAutoModelForCausalLMStoppingCriteriaStoppingCriteriaList)MBPPDataset)cleanup_codec                   @   s*   e Zd Zdd ZejejedddZdS )KeywordsStoppingCriteriac                 C   s    t |  g | _|| _|| _d S )N)r   __init__current_context	tokenizerkeywords_str)selfr   r    r   \/weka-jd/prod/containers/zhuqihao/dev-cpu/upload_code/deepseek-coder/Evaluation/MBPP/mbpp.pyr      s    
z!KeywordsStoppingCriteria.__init__)	input_idsscoresreturnc                 K   sD   | j |d d   | j| j }| jD ]}||kr, dS q,dS )Nr   TF)r   appenditemr   decoder   )r   r   r   kwargsr   wordr   r   r   __call__   s    
z!KeywordsStoppingCriteria.__call__N)	__name__
__module____qualname__r   torch
LongTensorFloatTensorboolr   r   r   r   r   r      s   r   c                   @   s:   e Zd ZdZdddZe dd Zdd Zdd Z	dS )MBPPz 
    MBPP evaluation class.
       python      Nr   Fffffff? T(      c              
   C   s   || _ || _|| _|| _|| _|| _|| _|| _|| _|| _	|	| _
|d dd| _|| _tj| jdd |d}ztj|ddd| _W n2 tk
r } zt| dstW 5 d }~X Y nX d S )	N
model_path/_T)exist_okcls)trust_remote_codeF)	data_rootmax_seq_lenmax_gen_len
batch_sizekn_samplelanguagelog_dirZsfttemperaturetop_preplace
model_nameinference_incrementosmakedirspopr   from_pretrainedr   	ExceptionprintAssertionError)r   r3   r4   r9   r5   r6   r:   r;   Zissftr<   r>   r?   tokenizer_cfgr8   Zk_sampleZtokenizer_clser   r   r   r   &   s(    
zMBPP.__init__c           -      C   s  | j dk	stdt| j| jd}t|| j }|j}|j}| jdkrX| jdksXtd|	  t
t||}|| }g }	|D ](}
t| jD ]}|	|
| j |  qqt|	}d}tj| j | j d| d	| j d
| j d}t|d}d}t }ttdt|	| jD ]}g }g }g }g }g }g }|	||| j  D ]8}|| }|j}|d }d|d }||d  d| d| d}d}tt|d ddD ]D}|| | } t| j| }!|!| j| j krڐqn
| } qq|dkrD|}| j|}"z&| j|"d| j| j  }W qDW n   |"d Y nX q|| ||d  |t| ||d  q@| j|ddd}#|#d |jdd| j df }$|#d |jdd| j df }%| j dkrt!dg| j}&|j"|$|%| j| j#| jj$dt%|&g| jj$d}'n&|j"|| j| j ddt%|&g| jj$d }'t&|'D ]v\}(})|'|( }*| jj|*dd!}*|*||( d }+|+'dd ( }+||( |+d"},|)t*+|,d  |,  |d7 }qJ| -||||| j q|.  |/  | 0| dS )#z%
        Evaluate the model.
        Nz/log_dir should not be None when evaluating MBPP)Z	samplenumr,   P   z!MBPP PASS@80 needs n_sample >= 80r   _rank_bs
_shot_log_.jsonwprompt
testz<You are an expert Python programmer, and here is your task: z% Your code should pass these tests:

z
[BEGIN]r*   r   codetask_idTpt)paddingreturn_tensorsr   attention_maskz[DONE]F)r   rW   max_new_tokensr<   eos_token_id	do_samplestopping_criteriapad_token_idr)   )rX   r;   r<   r?   r[   r\   )skip_special_tokens)rS   
generation)1r:   rF   r	   r3   r8   lenprocess_indexnum_processesr7   evalnparray_splitranger   r@   pathjoinr>   r6   r9   opentimer   rO   r   encoder4   r5   r   rB   todevicer;   r   generater<   rY   r   	enumeratesplitstripwritejsondumpsflush	log_scoreclosewait_for_everyone_calculate_final_score)-r   gptacceleratordatasetZnpromptdp_rankdp_sizeZprompt_indices_splitZprompt_indicesindicesxjall_numprocessed_numZlog_filetmpfileZ	totoalnum
start_timeidxZprompt_listZprompt_lensZanswers_listZ	test_listZtaskidZtokenized_prompt_lensdatarO   Zprompt1testsZprompt_currZfpromptiZfinalpromptZcurr_seq_lenZ
encodelistZtokenized_promptZinputidsZattenion_maskstop_criteriadecoded	local_idxtext
predictionZsuffixpredictionresr   r   r   
eval_model@   s    

 




$$
zMBPP.eval_modelc                 C   s   t j d }t | | | }td| dt| d| d|dd|| | d | d	 dd
d|dd| dd ||krtdt | d	 dd
dd dS )z 
        Log the score.
        i   @zDP RANK:z process_num/all_num:r.   z avg_time_per_batch:z.2fz s still_need:r,   <   z mzmem:z.3fz GiB bs:T)rt   zEVAL DONE! Process time N)r    cudamax_memory_allocatedri   rE   int)r   r|   r   r   r   bsmemZavg_timer   r   r   ru      s    >zMBPP.log_scorec           	      C   s   |j rtj| jd| j d}t|d}t|jD ]V}tj| j| j d| d| j	 d| j
 d}|t|  d  t| q4|  d	}| j
}t|tj| jd
| j||d}td|d| j   t| dS )z,
        Calculate the final score.
        Zfinal_z.jsonlrN   rJ   rK   rL   rM   rP   
   zmbpp_test.jsonl)
input_fileZproblem_filetmp_dirtimeoutr9   zscore iszpass@%dN)is_local_main_processr@   rf   rg   r:   r>   rh   re   ra   r6   r9   rq   readrp   removerv   r   r3   rE   r7   )	r   rz   Zlogfilepathlogfiler   Z
tmplogfiler   Zrunlangr   r   r   r   rx      s    
. 
zMBPP._calculate_final_score)r%   r&   r'   r(   Nr   Fr)   r*   TNr+   r,   )
r   r   r   __doc__r   r    no_gradr   ru   rx   r   r   r   r   r$   "   s&                        

ir$   )ri   stringmultiprocessingr@   numpyrc   rr   rer    datetime
subprocesstorch.distributeddistributeddistZattrdictr   r   Zhuman_eval.evaluationr   transformersr   r   r   r   Zutils.datasetr	   Zutils.utilsr
   r   r$   r   r   r   r   <module>   s$   