filename='C:/Users/haesaekju/OneDrive/Documents/PyData/P00000001-ALL.csv'
chunksize=2 * 10 ** 5
for cnt, chunk in enumerate(pd.read_csv(filename, chunksize=chunksize)):
    #preprocessing(chunk)
    chunk.to_csv('str_' + str(cnt) + '.csv', header=['cmte_id','cand_id','cand_nm','contbr_nm','contbr_city','contbr_st','contbr_zip','contbr_employer','contbr_occupation','contb_receipt_amt','contb_receipt_dt','receipt_desc','memo_cd','memo_text','form_tp','file_num'])
    if cnt >= 10:
        break

 

 

반응형

+ Recent posts