Source code for prep.excel
"""Module for reading excel files.
"""
# New format 20181007
import pandas as pd
import pathlib
from calendar import monthrange
# STAGE SCANNING
def _file_year(file, template='phderi'):
file = pathlib.Path(file)
if template == 'phderi':
return int(file.stem.split()[0])
# def _file_name(file, template='phderi', ixn=-2):
# file = pathlib.Path(file)
# if template == 'phderi':
# return ''.join(file.stem.split()[ixn:])
[docs]def _cell_index(dataframe, template='phderi'):
"""Return cell index (column, row) of first value on pivot.
Parameters
----------
dataframe : DataFrame
Raw dataframe imported from excel
template : str, optional
Template, by default 'phderi'
Returns
-------
list
Return [column index, row index]
Raises
------
Exception
Not match with template.
"""
template_check = {'phderi': ['Jan', 'Feb'],
'pdderi': ['JAN', 'FEB']}
target, check = template_check[template]
cell_index = []
for column in dataframe:
target_status = dataframe[column].astype(str).str.contains(
'^' + target + '$'
).sum()
if target_status:
cell_index.append(column)
break
column = cell_index[0]
row_target = dataframe[column] == target
cell_index.append(dataframe[column]
[row_target].index.values.astype(int)[0])
column, row = cell_index[0], cell_index[1]
if dataframe.iloc[row, column + 1] == check:
return cell_index
else:
raise Exception(
'Template tidak sesuai dengan {template}'.format(
template=template)
)
[docs]def _file_single_pivot(file, template='phderi'):
"""Return pivot table inside file
Parameters
----------
file : str
File path
template : str, optional
Template, by default 'phderi'
Returns
-------
Dataframe
Pivot table
"""
file = pathlib.Path(file)
if template == 'phderi' or template == 'pdderi':
dataframe = pd.read_excel(file, sheet_name=0, header=None)
cell_index = _cell_index(dataframe, template)
column, row = cell_index[0], cell_index[1]
pivot = dataframe.iloc[row + 1:row + 32, column:column + 12]
return pivot
[docs]def _dataframe_year(year):
"""Return empty dataframe with date index
Parameters
----------
year : int
Year
Returns
-------
Dataframe
Empty dataframe with date index
"""
start, end = map(
pd.Timestamp, '{year}0101 {year}1231'.format(year=year).split())
return pd.DataFrame(index=pd.date_range(start, end))
[docs]def _dataframe_data(pivot, year):
"""Transform pivot table to list
Parameters
----------
pivot : DataFrame
Pivot table
year : int
Year
Returns
-------
list
Return list of data
"""
month = 1
data = []
for column in pivot:
days = monthrange(year, month)[1]
end = 31 if (days == 31) else (days - 31)
data += pivot[column][:end].tolist()
month += 1
return data
[docs]def _dataframe_table(pivot, year, name='ch'):
"""Transform pivot table to single column dataframe.
Parameters
----------
pivot : DataFrame
Pivot table
year : int
Year
name : str, optional
Column name, by default 'ch'
Returns
-------
DataFrame
Dataframe
"""
table = _dataframe_year(year)
data = _dataframe_data(pivot, year)
table[name] = data
return table
# @todo reshape pivot table
# @body cari tahu fungsi untuk mengubah pivot table, jadi fungsi diatas tidak
# @body diperlukan.