Thanks to the creator of Pandas, data extraction from excel and delimited text file is made easy.
Below is my modified function that suits to be used to extract data from delimited text files and excel files, then they are converted to html.
from pandas import read_csv, read_excel from os import chdir def get_data_frame(dir, filename, sep=None, is_excel=False, skip_rows=[]): chdir(dir) # default separator is a comma, hence I did not specify separator. if is_excel: if skip_rows: return read_excel(filename, skiprows=skiprows) else: return read_excel(filename) if sep: return read_csv(filename, sep=sep) else: return read_csv(filename) if __name__ == '__main__': # remove the first 3 rows [0:3] as those are not cell data in xls. # to use read_excel the xlrd module is required. df = get_data_frame("D:\\temp", "Income-By-State-1984.xls", is_excel=True, skip_rows=[0, 1, 2]) df.to_html("start_from_row4.html", index=0) df1 = get_data_frame("D:\\temp", "Comma-Separated.txt") df1.to_html("comma_sep.html") df2 = get_data_frame("D:\\temp", "Space-Separated.txt", sep='\s+') # do not display the header of the columns. df2.to_html("space_sep.html")
HTML