function http_transaction( $url, $params="", $cookie="", $ch=0, $user="", $pass="", $header="", $method='', $user_agent= "Mozilla/4.0 (compatible; MSIE 6.0; Windows 98) Explorer/7.0") { $header [] = "Accept: text/html;q=0.9, text/plain;q=0.8, image/png, */*;q=0.5" ; $header [] = "Accept_charset: windows-1251, utf-8, utf-16;q=0.6, *;q=0.1"; $header [] = "Accept_encoding: identity"; $header [] = "Accept_language: ru,en-us,en;q=0.5"; $header [] = "Connection: close"; $header [] = "Cache-Control: no-store, no-cache, must-revalidate"; $header [] = "Keep_alive: 300"; $header [] = "Expires: Thu, 01 Jan 1970 00:00:01 GMT"; $header [] = "Accept-Language: ru,en-us;q=0.7,en;q=0.3"; $Debug=true; if ( $ch==0 ) $ch = curl_init(); if ( $Debug ) print "CH: $ch<br>\n"; curl_setopt($ch, CURLOPT_URL, trim($url)); curl_setopt($ch, CURLOPT_TIMEOUT, 30 ); curl_setopt($ch, CURLOPT_LOW_SPEED_LIMIT, 0 ); curl_setopt($ch, CURLOPT_LOW_SPEED_TIME, 100000 );
curl_setopt($ch, CURLOPT_VERBOSE,1); curl_setopt($ch, CURLOPT_HEADER,1); curl_setopt($curl, CURLOPT_NOBODY, 1); curl_setopt($curl, CURLOPT_FAILONERROR, 1); curl_setopt($ch, CURLOPT_HTTPHEADER, $header); //curl_setopt($ch, CURLOPT_REFERER,$HTTP_REFERER); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_VERBOSE, 1); if ( $Debug ) print "REFERER: $HTTP_REFERER<br>\n"; if ( $cookie!="" ) { curl_setopt($ch, CURLOPT_COOKIE, $cookie ); if ( $Debug ) print "COOKIE: $cookie<br>\n"; }
if ( $method=='GET' ) { $url = $url . $params; curl_setopt($ch, CURLOPT_POST,0); curl_setopt($ch, CURLOPT_POSTFIELDS,""); } else { curl_setopt($ch, CURLOPT_POST,1); curl_setopt($ch, CURLOPT_POSTFIELDS,$params); $method = "POST"; } if ( $Debug ) print "URL: $url<br>\n"; if ( $Debug ) print "PARAM: $params<br>\n"; if ( $Debug ) print "METHOD: $method<br>\n"; if ( $Debug ) print "USERAGENT: $user_agent<br>\n";
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); // this line makes it work under https
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent); curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); curl_setopt($ch, CURLOPT_NOPROGRESS,1 );
curl_setopt($ch, CURLOPT_RANGE,"1-100000" ); curl_setopt($ch, CURLOPT_READFUNCTION, "read_callback" );
curl_setopt($ch, CURLOPT_COOKIEJAR, "cook"); curl_setopt($ch, CURLOPT_COOKIEFILE, "cook");
if ( $header!="" ) curl_setopt ($ch, CURLOPT_HTTPHEADER, $header ); if ( $user!="" ) curl_setopt($ch, CURLOPT_USERPWD, "$user:$pass" ); if ( $Debug ) print "<pre>Header: ".print_r( $header )."<pre><br>\n";
flush();
$result=curl_exec ($ch);
if ( $Debug ) print "Error: ( ".curl_errno ( $ch )." )".curl_error ( $ch )."<br>\n";
curl_close($ch);
return( $result ); }
$url = 'http://news.tut.by/rss/health.rss'; //адрес RSS ленты
$rss = @file_get_contents($url); //получаем содержимое RSS лент в виде одной строки
if ($rss) { preg_match_all("/title>[^>]+>/", $rss, $title); //парсим титлы preg_match_all("/<description>[^<]+<\/description>/", $rss, $description); //парсим дескрипшены preg_match_all('/<link>[^<]+<\/link>/', $rss, $link); //парсим дескрипшены $count = count($title[0])-1; //число проходов цикла. for ($i=6; $i < $count-1; $i++) { echo '<h1>'.substr($title[0][$i+1], 6, -8).'</h1>'; //выводим на печать заголовок статьи echo substr($description[0][$i], 13, -14)."<BR/>"; //выводим на печать текст статьи echo substr($link[0][$i+1], 6, -7)."<BR/>"; //выводим на печать адрес статьи $addr = substr($link[0][$i+1], 6, -7); $addr = iconv("utf-8", "windows-1251", $addr); //$new = http_transaction($addr); $new = http_transaction('http://www.news.tut.by'); // к примеру echo $new; $pos = strpos($new, '<div id="body80">'); $new = substr($new, ($pos+17)); $pos = strpos($new, '<div class="authorAddon">'); $new = substr($new, 0, $pos); $nulled = iconv("windows-1251", "utf-8", $new); echo $nulled."</table>"; } } else { echo '<font color="red">Ошибка парсинга '.$url.'</font>'; //выводим ошибку если file_get_contents() вернула false } |