U
    _,Be                     @   s*   d dl Z d dlZd dlZG dd dZdS )    Nc                   @   s.   e Zd ZdddZdd Zdd Zdd	 Zd
S )MBPPDataset   c           	      C   s,  || _ ttj|d | _| | j| _g | _	t
ddD ]}| j| d }d| j| d }| j| d dd	d
d}d| d| d| d}t| j	dkr| j	| q<| j	| j	d |  q<g | _t
ddD ]$}t
|D ]}| j| j|  qqtjd td| dt| j  dS )u0   
        root: 数据文件的根目录
        z
mbpp.jsonlr      prompt
testcode 	z    z<You are an expert Python programmer, and here is your task: z% Your code should pass these tests:

z	
[BEGIN]
z
[DONE]
r   
   i  i  zRead MBPP from z, number of samples N)rootopenospathjoin	readlinesdataget_qa_only_dataZ
clean_datar   rangereplacelenappendtestdatanprandomseedprint)	selfr   	samplenumir   testsr   prompt1j r%   e/weka-jd/prod/containers/zhuqihao/dev-cpu/upload_code/deepseek-coder/Evaluation/MBPP/utils/dataset.py__init__   s$    zMBPPDataset.__init__c                 C   sL   g }|D ]>}t |}|d }|d }|d }|||||d d q|S )Ntext	test_listr   task_id)r   r   r   r*   )jsonloadsr   )r   Z	data_jsonansliner   suffixr   r%   r%   r&   r       s    
zMBPPDataset.get_qa_only_datac                 C   s
   t | jS N)r   r   )r   r%   r%   r&   __len__*   s    zMBPPDataset.__len__c                 C   s   | j | }|S r0   )r   )r   indexsampler%   r%   r&   __getitem__-   s    
zMBPPDataset.__getitem__N)r   )__name__
__module____qualname__r'   r   r1   r4   r%   r%   r%   r&   r      s   

r   )r   numpyr   r+   r   r%   r%   r%   r&   <module>   s   