[python]Read from delimited files and excel and convert to html with Pandas

Thanks to the creator of Pandas, data extraction from excel and delimited text file is made easy.

Below is my modified function that suits to be used to extract data from delimited text files and excel files, then they are converted to html.

from pandas import read_csv, read_excel
from os import chdir


def get_data_frame(dir, filename, sep=None, is_excel=False, skip_rows=[]):
    chdir(dir)
    # default separator is a comma, hence I did not specify separator.
    if is_excel:
        if skip_rows:
            return read_excel(filename, skiprows=skiprows)
        else:
            return read_excel(filename)
    if sep:
        return read_csv(filename, sep=sep)
    else:
        return read_csv(filename)


if __name__ == '__main__':
    # remove the first 3 rows [0:3] as those are not cell data in xls.
    # to use read_excel the xlrd module is required.
    df = get_data_frame("D:\\temp", "Income-By-State-1984.xls", is_excel=True, skip_rows=[0, 1, 2])
    df.to_html("start_from_row4.html", index=0)

    df1 = get_data_frame("D:\\temp", "Comma-Separated.txt")
    df1.to_html("comma_sep.html")

    df2 = get_data_frame("D:\\temp", "Space-Separated.txt", sep='\s+')
    # do not display the header of the columns.
    df2.to_html("space_sep.html")

HTML
excel1
from delimiters.PNG

Advertisement

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s