This could be used to extract all links (
function html_element_handler($attribs, $content)
{
echo "Content:
".htmlspecialchars($content)."
";
echo "Attributes:
";
while (list($key, $value) = each($attribs))
{
echo "$key = $value
";
}
echo "";
}
/*
This function opens and parses $html_file for $tag
and returns its content and its attributes to the
callback function $element_handler.
$element_handler is a custom funtion which acts upon
the content and the attributes of $tag and gets called
everytime $tag is closed.
It must accept the following parameters:
- $content (content of the element $tag)
- $attributes (attributes of $tag
*/
function html_parse($html_file, $element_handler, $tag)
{
$fd = fopen($html_file, "r") or die("Error: Unable to open file $html_file");
// Loop until we're at EOF of $fd
while (!feof($fd))
{
$char = fgetc($fd);
if ($open_tag != "")
{
$content .= $char;
}
if ($char == "<")
{
$inside_tag = true;
}
// We're inside a tag, so add $char to $element (for testing later if
// this is $tag)
if ($inside_tag)
{
$element .= $char;
}
if ($char == ">")
{
$inside_tag = false;
if (ereg ("\<$tag", $element))
{
$open_tag = $element;
}
else
{
if ($element == "$tag>")
{
$tmp_array = ereg_replace("\<$tag", "",
$open_tag);
$tmp_array = ereg_replace(">", "", $tmp_array);
$tmp_array = split ("[$\"] +", $tmp_array);
for ($i=0; $i {
$tmp_array[$i] = trim($tmp_array[$i]);
$tmp_array[$i] = ereg_replace("\"", "", $tmp_array[$i]);
$tmp_attribs = split("=", $tmp_array[$i]);
for ($j=0; $j {
$attribs[trim($tmp_attribs[$j])] = trim($tmp_attribs
[$j+1]);
}
}
$content = eregi_replace("\<$tag([^>]*)([^>]*)>", "",
$content);
$content = eregi_replace("\$tag>", "", $content);
$element_handler($attribs, $content);
$content = "";
$attribs = "";
$tmp_array = "";
$open_tag = "";
}
}
$element = "";
}
}
fclose( $fd );
}
html_parse("anfrage.html", "html_element_handler", "test");
?>