Convert HTML table to Excel XLS

I recently needed to update a page that dumped a HTML table into Excel – and found it dumped everything, including hyperlinks, buttons, images, etc. This didn’t sit well with Excel for Mac, and probably not a good idea.

I thought I would re-write it, but in a way that was re-usable (as this code was situated on every datagrid in the application)

So, using the PEAR extension Spreadsheet_Excel_Writer and a simple HTML DOM class, I came up with this. Its by no means finished, but as it stands, it works pretty well for what I need. Feel free to rob it and use the code.

The call is from a form which uses jQuery to copy the table as HTML into an input and then uses that in the output script ($_REQUEST[‘gridcontent’])

When its all complete, I may put it on an open source repo, such as sourceforge.

FYI, there is a section in here that takes cell text with a
in it as a split cell. So if your TD has a BR in it, it will make all the other rows merged so that the data looks nice. This is by design.

$fail = false;

@(include('Spreadsheet/Excel/Writer.php')) OR $fail = true;

if ($fail) {
    //not got the nice PEAR installed, so do it the old way
    header("Content-type: application/vnd.ms-excel; name='excel'");
    header("Content-Disposition: filename=export.xls");
    // Fix for crappy IE bug in download.
    header("Pragma: ");
    header("Cache-Control: ");
    echo $_REQUEST['datatodisplay'];
    exit();
}

//if we get here then we've got the spreadsheet pear addon
//so do it the nice way
function remove_tags($string) {
    $temp = strip_tags($string);
    return $temp;
}

require_once '../ext/simplehtmldom/simple_html_dom.php';

//this is for testing
header("Content-type: text/plain");

//start a new spreadsheet thingy
$spread             = new Spreadsheet_Excel_Writer();

//get the table of data
$table              = $_REQUEST['datatodisplay'];

$dom            = new simple_html_dom();
$table_obj      = str_get_html($table);
$header_array   = array();

//see if its a proper table and has a thead section
if (strpos($table, 'find('thead') as $head) {

        //and the rows (again only supports one)
        foreach ($head->find('tr') as $head_row) {

            //some pages might use proper th's
            foreach ($head_row->find('th') as $head_cell) {
                $header_array[] = $head_cell->innertext;
            }

            //and some old dirty might use td's
            foreach ($head_row->find('td') as $head_cell) {
                $header_array[] = $head_cell->innertext;
            }
        }
    }

} else {

    //if we're not using thead - then grab the first child
    $header         = $table_obj->firstChild();

    //do all the rows
    foreach ($table_obj->find('tr') as $table_row) {

        //and the table headers (this should only be looped once)
        foreach ($table_row->find('th') as $table_cell) {

            //save the header name
            $header_array[] = $table_cell->innertext;
        }
    }
}

$table_data     = array();
$column_cells   = array();

//loop through each row
foreach ($table_obj->find('tr') as $table_row) {
    $temp_data = array();

    //and each cell in that row
    foreach ($table_row->find('td') as $table_cell) {

        //if the cell has a random input box
        if (strpos($table_cell, 'find('input') as $input) {

                //get the value of the input
                $temp_data[] = array(0 => $input->getAttribute('value'));
            }

        } else {

            //are there any BRs?
            if (stripos($table_cell->innertext, '')) {

                //perhaps we should create an array of these
                $string         = str_ireplace('', '', $table_cell->innertext);
                $temp           = explode("", $string);
                $return_array   = array();
                foreach ($temp as $item) {
                    $text = trim(strip_tags($item));
                    if ("" != $text) {
                        $return_array[] = $text;
                    }
                }

                //so now we have a nicely trimmed array of values.
                $temp_data[]  = $return_array;
            } else {                

                //othersise just get the innertext
                $temp_data[] = array(0 => trim(str_ireplace(" ", " ", strip_tags($table_cell->innertext))));
            }
        }
    }

    //append the row
    $table_data[] = $temp_data;
}

//create a worksheet
$sheet      =& $spread->addWorksheet('icfm_export');

$sheet->writeRow(0, 0, $header_array);

$row_number = 0;
$new_line   = 0;

$format_justify_top =& $spread->addFormat();
$format_justify_top->setVAlign('top');

for ($row = 1; $row < count($table_data); $row++) {
    //see if we have any multiples
    foreach ($table_data[$row] as $the_row) {
        if (count($the_row) > $biggest) {
            $biggest = count($the_row);
        }
    }

    $row_number ++;
    foreach ($table_data[$row] as $col => $the_row) {
        $counter = count($the_row);

        for ($i = 0; $i < $biggest; $i++) {
             $text = '';
             if (isset($the_row[$i])) {
                 $text = $the_row[$i];
             }
             $sheet->writeString($row_number + $i, $col, $text, $format_justify_top);

            if ('' == $text) {
                $sheet->mergeCells($row_number, $col, $row_number + $i, $col);
            }
        }
    }
    $row_number += ($biggest - 1);
}        
//we have the header and the data now. awesome
$spread->send('export.xls');

$spread->close();

Leave a Reply

Your email address will not be published.

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <s> <strike> <strong>

This site uses Akismet to reduce spam. Learn how your comment data is processed.