Парсер rss новостей и статей

Тема в разделе "NetCat", создана пользователем emil116rus, 18 дек 2008.

Статус темы:
Закрыта.
  1. emil116rus

    emil116rus Постоялец

    Регистр.:
    29 мар 2008
    Сообщения:
    130
    Симпатии:
    15
    Люди добрые помогите с парсером. Может у кого нибудь есть?
     
  2. irk

    irk Постоялец

    Регистр.:
    18 май 2009
    Сообщения:
    87
    Симпатии:
    1
    парсер RSS

    <?php

    function startelement($parser, $name, $attrs) {
    global $tag, $rss;
    if ($name == rss)
    $rss = ^rss;
    elseif ($name == rdf:rdf)
    $rss = ^rdf:rdf;

    $tag .= ^ . $name;
    }

    function endelement($parser, $name) {
    global $tag;
    global $itemcount, $items;
    if ($name == item) {
    $itemcount++;
    if (!isset($items[$itemcount])) $items[$itemcount] = array(title => , link => , desc => , pubdate => );
    }

    $tag = substr($tag, 0, strrpos($tag, ^));
    }

    function characterdata($parser, $data) {
    global $tag, $chantitle, $chanlink, $chandesc, $rss, $imgtitle, $imglink, $imgurl;
    global $items, $itemcount;

    $rsschannel = ;
    if ($data) {
    if ($tag == $rss . ^channel^title) {
    $chantitle .= $data;
    } elseif ($tag == $rss . ^channel^link) {
    $chanlink .= $data;
    } elseif ($tag == $rss . ^channel^description) {
    $chandesc .= $data;
    }
    if ($rss == ^rss) $rsschannel = ^channel;

    if ($tag == $rss . $rsschannel . ^item^title) {
    $items[$itemcount][title] .= $data;
    } elseif ($tag == $rss . $rsschannel . ^item^link) {
    $items[$itemcount][link] .= $data;
    } elseif ($tag == $rss . $rsschannel . ^item^description) {
    $items[$itemcount][desc] .= $data;
    } elseif ($tag == $rss . $rsschannel . ^item^pubdate) {
    $items[$itemcount][pubdate] .= $data;
    } elseif ($tag == $rss . $rsschannel . ^image^title) {
    $imgtitle .= $data;
    } elseif ($tag == $rss . $rsschannel . ^image^link) {
    $imglink .= $data;
    } elseif ($tag == $rss . $rsschannel . ^image^url) {
    $imgurl .= $data;
    }
    }

    }

    function parserss($url) {
    global $tag, $chantitle, $chanlink, $chandesc, $rss, $items, $itemcount, $imgtitle, $imglink, $imgurl;
    $chantitle = ;
    $chanlink = ;
    $chandesc = ;
    $imgtitle = ;
    $imglink = ;
    $imgurl = ;
    $tag = ;
    $rss = ;

    global $items, $itemcount;

    $itemcount = 0;
    $items = array(0 => array(title => , link => , desc => , pubdate => ));

    $xml_parser = xml_parser_create();
    xml_set_element_handler($xml_parser, startelement, endelement);
    xml_set_character_data_handler($xml_parser, characterdata);

    @$fp = fopen($url, r);
    $data = ;
    while (true) {
    @$datas = fread($fp, 4096);
    if (strlen($datas) == 0) {
    break;
    }
    $data .= $datas;
    }

    @fclose($fp);

    if ($data != ) {
    $xmlresult = xml_parse($xml_parser, $data);
    $xmlerror = xml_error_string(xml_get_error_code($xml_parser));
    $xmlcrtline = xml_get_current_line_number($xml_parser);

    if ($xmlresult)
    displaydata();
    else
    print(error parsing this feed !<br />error: $xmlerror , at line: $xmlcrtline);
    } else {
    print(error while retriving feed $url);
    }

    xml_parser_free($xml_parser);
    }

    function displaydata() {
    global $chantitle, $chanlink, $chandesc, $rss, $items, $itemcount, $imgtitle, $imglink, $imgurl;
    global $items, $itemcount;
    ?>
    <html><head><title><?= $chantitle ?></title></head>
    <body>
    <div>
    <a href=<?= $chanlink ?>><img src=<?= $imgurl ?> alt=<?= $imgtitle ?> border=0 /></a>
    <h1><?= $chantitle ?></h1>
    <h3><?= $chandesc ?></h3>
    </div>
    <hr />
    <?php
    for($i = 0;$i < count($items)-1;$i++) {
    echo <h4>.$items[$i][title].</h4>;
    echo <h5>.$items[$i][pubdate].</h5>;
    echo <a href=.$items[$i][link].>.$items[$i][desc].</a>;
    }
    ?>
    </body></html>

    <?php }

    $url=http://xmlhack.ru/index.rdf;
    parserss($url);
    ?>
     
  3. irk

    irk Постоялец

    Регистр.:
    18 май 2009
    Сообщения:
    87
    Симпатии:
    1

    <?php
    $itemNum=0;
    class RSSParser {
    var $channel_title="";
    var $channel_website="";
    var $channel_description="";
    var $channel_pubDate="";
    var $channel_lastUpdated="";
    var $channel_copyright="";
    var $title="";
    var $link="";
    var $description="";
    var $pubDate="";
    var $author="";
    var $url="";
    var $width="";
    var $height="";
    var $inside_tag=false;
    function RSSParser($file,$encType) {
    $this->xml_parser = xml_parser_create($encType);
    xml_set_object( $this->xml_parser, &$this );
    xml_set_element_handler( $this->xml_parser, "startElement", "endElement" );
    xml_set_character_data_handler( $this->xml_parser, "characterData" );
    $fp = @fopen("$file","r") or die( "$file could not be opened" );
    while ($data = fread($fp, 4096)){xml_parse( $this->xml_parser, $data, feof($fp)) or die( "XML error");}
    fclose($fp);
    xml_parser_free( $this->xml_parser );
    }

    function startElement($parser,$tag,$attributes=''){
    $this->current_tag=$tag;
    if($this->current_tag=="ITEM" || $this->current_tag=="IMAGE"){
    $this->inside_tag=true;
    $this->description="";
    $this->link="";
    $this->title="";
    $this->pubDate="";
    }
    }

    function endElement($parser, $tag){
    switch($tag){
    case "ITEM":
    $this->titles[]=trim($this->title);
    $this->links[]=trim($this->link);
    $this->descriptions[]=trim($this->description);
    $this->pubDates[]=trim($this->pubDate);
    $this->authors[]=trim($this->author);
    $this->author=""; $this->inside_tag=false;
    break;
    case "IMAGE":
    $this->channel_image="<img src=\"".trim($this->url)."\" width=\"".trim($this->width)."\" height=\"".trim($this->height)."\" alt=\"".trim($this->title)."\" border=\"0\" title=\"".trim($this->title)."\" />";
    $this->title=""; $this->inside_tag=false;
    default:
    break;
    }
    }

    function characterData($parser,$data){
    if($this->inside_tag){
    switch($this->current_tag){
    case "TITLE":
    $this->title.=$data; break;
    case "DESCRIPTION":
    $this->description.=$data; break;
    case "LINK":
    $this->link.=$data; break;
    case "URL":
    $this->url.=$data; break;
    case "WIDTH":
    $this->width.=$data; break;
    case "HEIGHT":
    $this->height.=$data; break;
    case "PUBDATE":
    $this->pubDate.=$data; break;
    case "AUTHOR":
    $this->author.=$data; break;
    default: break;
    }//end switch
    }else{
    switch($this->current_tag){
    case "DESCRIPTION":
    $this->channel_description.=$data; break;
    case "TITLE":
    $this->channel_title.=$data; break;
    case "LINK":
    $this->channel_website.=$data; break;
    case "COPYRIGHT":
    $this->channel_copyright.=$data; break;
    case "PUBDATE":
    $this->channel_pubDate.=$data; break;
    case "LASTBUILDDATE":
    $this->channel_lastUpdated.=$data; break;
    default:
    break;
    }
    }
    }
    }

    $forex1 = new RSSParser("http://www.newsland.ru/rss/getnews/ord/1/cat/0","utf-8");
    ?>

    <table width="100%" border="0" class="tab">
    <?php
    $forex1_RSSmax=50;
    if($forex1_RSSmax==0 || $forex1_RSSmax>count($forex1->titles))$forex1_RSSmax=count($forex1->titles);
    for($itemNum=0;$itemNum<$forex1_RSSmax;$itemNum++){?><tr>
    <td align="left" bgcolor="" ><b style="font-size:12px"><?php
    $title=$forex1->titles[$itemNum];
    echo iconv("UTF-8","windows-1251", $title )
    ?></b></td>
    </tr>
    <tr>
    <td align="left" ><?php
    $descr=$forex1->descriptions[$itemNum];
    echo iconv("UTF-8","windows-1251", $descr )
    ?>
    <div class="file"><noindex><a href=" <?php echo $forex1->links[$itemNum]; ?> "target="_blank" <?php echo $forex1->links[$itemNum]; ?>" rel=nofollow>Читать целиком</a><br><br></noindex></div></td>
    </tr>
    <?php } ?>
    </table>
     
  4. shunia

    shunia Постоялец

    Регистр.:
    23 июн 2007
    Сообщения:
    130
    Симпатии:
    8
    http://netcat.e812.ru/class/import/
     
    mafru нравится это.
Статус темы:
Закрыта.