python programming
[pandas] csv 파일 로드 및 encode / decode
HR대장
2022. 7. 27. 13:37
728x90
pandas를 이용하여 csv 파일을 로드하여, 특정 열을 처리하고 다시 Dataframe으로 합치는 작업을 해보았습니다.
# This is a sample Python script.
# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
import base64
import csv
import pandas as pd
def print_hi(name):
# Use a breakpoint in the code line below to debug your script.
print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint.
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
#words = str("¸íÁøÃÊÅëÇб¸¿ª")
#print(words.encode('iso-8859-1').decode('cp949'))
#print_hi('PyCharm')
#data = pd.read_csv("c:/upload/middle_shape_2022.csv", sep=',')
#data = pd.read_csv("c:/upload/elementary_shape_2022.csv", sep=',')
#data = pd.read_csv("c:/upload/highschool_nostandard_shape_2022.csv", sep=',')
data = pd.read_csv("c:/upload/highschool_standard_shape_2022.csv", sep=',')
ls1 = []
for row in data.index:
row_ls1 = data['HAKGUDO_NM'][row] #여기
row_ls1_decode = str(row_ls1).encode('iso-8859-1').decode('cp949')
print(row_ls1_decode)
ls1.append(row_ls1_decode)
ls2 = []
for row2 in data.index:
row_ls2 = data['EDU_UP_NM'][row2] #여기
row_ls2_decode = str(row_ls2).encode('iso-8859-1').decode('cp949')
print(row_ls2_decode)
ls2.append(row_ls2_decode)
ls3 = []
for row3 in data.index:
row_ls3 = data['EDU_NM'][row3] #여기
row_ls3_decode = str(row_ls3).encode('iso-8859-1').decode('cp949')
print(row_ls3_decode)
ls3.append(row_ls3_decode)
df1 = pd.DataFrame(ls1, columns = ['HAKGUDO_NM']) #여기
df2 = pd.DataFrame(ls2, columns=['EDU_UP_NM']) # 여기
df3 = pd.DataFrame(ls3, columns=['EDU_NM']) # 여기
#print(pd.concat([data['the_geom'],data['shp_srs'],data['OBJECTID'],data['HAKGUDO_ID'],df1,data['SD_CD'],data['SGG_CD'],data['EDU_UP_CD'],df2,data['EDU_CD'],df3,data['CRE_DT'],data['UPD_DT'],data['BASE_DT']], axis=1))
final_df = pd.concat([ data['the_geom']
,data['shp_srs']
,data['OBJECTID']
,data['HAKGUDO_ID']
,df1
,data['HAKGUDO_GB']
,data['SD_CD']
#,data['SGG_CD']
,data['EDU_UP_CD']
,df2
,data['EDU_CD']
,df3
,data['CRE_DT']
,data['UPD_DT']
,data['BASE_DT']], axis=1)
#final_df.to_csv('c:/upload/middle_shape_2022_final.csv', sep=',', index=False)
#final_df.to_csv('c:/upload/elementary_shape_2022_final.csv', sep=',', index=False)
#final_df.to_csv('c:/upload/highschool_nostandard_shape_2022_final.csv', sep=',', index=False)
final_df.to_csv('c:/upload/highschool_standard_shape_2022_final.csv', sep=',', index=False)
# See PyCharm help at https://www.jetbrains.com/help/pycharm/
728x90