import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

from contextlib import suppress

import os

import sys

 

 

## Put path in which to save

filename="test.csv"

path_file=ow.getcwd() + "/" + filename

path_file

 

## 빈 파일 만들기

!time touch path_file

 

# 파일 전달하기

!time hdfs dfs -ut test.csv /user/kp19983

 

# drop the table

with suppress(TypeError) : pd.read_sql("""DROP TABLE IF EXISTS db.temp""", conn)

 

## Create table

query="""

create table db.temp(salaried int,

                                    code_group VARCHAR(20),

                                    cnt int)

ROW FORMAT DELIMITED

FIELDS TERMINATED BY ','

TBLPROPERTIES("skip.header.line.count"="1");

"""

pd.read_sql(query, conn)

 

## Load the data into table

with suppress(TypeError) : pd.read_sql("""

LOAD DATA inpath 'hdfs//koreaserver/data/db/users/temp.csv' INTO TABLE db.temp;

""", conn)

 

## 테이블 잘 생성되었는지 확인해 보기

query="""

select *

from db.temp

"""

a=pd.read_sql(query, conn)

 

a[:5]

 

반응형

+ Recent posts