Reputation: 2005
My function is supposed to get the destination URL of $url:
function getUrl($url)
{
$user_agent='Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)';
$ch = curl_init();
$timeout = 10; // set to zero for no timeout
curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_USERAGENT, $user_agent);
curl_setopt ($ch, CURLOPT_HEADER, 1);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, true);
$curl = curl_exec($ch);
$header = curl_getinfo($ch);
curl_close($ch);
return $header;
}
function get_url_list() {
$url = "http://www.webliste.ch/click.aspx?nr=148252";
$result=getUrl($url);
print_r($result);echo "<br>";
}
get_url_list();
This results in the following:
Array
(
[url] => http://www.webliste.ch/click.aspx?nr=148252
[content_type] => text/html; charset=iso-8859-1
[http_code] => 200
[header_size] => 320
[request_size] => 139
...
[redirect_time] => 0
[certinfo] => Array
(
)
[redirect_url] =>
)
I am at a loss, because the URL is redirecting, and if I echo $ch, I get the redirected website.
Anyone know what's the cause of this?
The following doesnt work either:
$final_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
The output is the same as $result['url'], which is not what I am looking for.
Upvotes: 1
Views: 1706
Reputation: 26033
I've analyzed what actually happens and now I see that the redirect is not caused by a redirect header on that page instead it's with JavaScript instantly submitting a form and redirecting you to the start page.
Might be hard to determine the URL of the page but what you can do is look for a <form>
tag and then find the URL in its action
attribute.
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="de">
<head id="Head1">
<title></title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
<meta name="ROBOTS" content="NOINDEX, NOFOLLOW" />
</head>
<body>
<form id="form1" action="http://www.taxiherold.ch">
<div id="panGo" align="center">
<script type="text/javascript">
document.getElementById('form1').submit();
</script>
</div>
</form>
</body>
</html>
So try this code now:
$ch = curl_init('http://www.webliste.ch/click.aspx?nr=148252');
curl_setopt ($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)');
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, false);
$data = curl_exec($ch);
$dom = new DOMDocument();
@$dom->loadHTML($data);
$xpath = new DOMXPath($dom);
$url = $xpath->query('//body/form');
$url = ($url->length == 1 ? $url->item(0)->getAttribute('action') : null);
var_dump($url);
Will output:
Upvotes: 1
Reputation: 3149
this class that i wrote can help you
this class return you all header information like redirection ,...
function HeaderProc($response,$Run="",$String=1/*[Is 1 IF Use for String Mode ]*/){
print_r($response);
if($String==1){
$response=explode("\r\n",$response);
}
$PartHeader=0;
$out[$PartHeader]=array();
while(list($key,$val)=each($response)){
$name='';
$value='';
$flag=false;
for($i=0;$i<strlen($val);$i++){
if($val[$i]==":"){
$flag=true;
for($j=$i+1;$j<strlen($val);$j++){
if($val[$i]=="\r" and $val[$i+1]=="\n"){
break;
}
$value.=$val[$j];
}
break;
}
$name.=$val[$i];
}
if($flag){
if($name=='' and $value==''){
$PartHeader++;
}else{
if(isset($out[$PartHeader][$name])){
if(is_array($out[$PartHeader][$name])){
$out[$PartHeader][$name][]=$value;
}else{
$T=$out[$PartHeader][$name];
$out[$PartHeader][$name]=array();
$out[$PartHeader][$name][0]=$T;
$out[$PartHeader][$name][1]=$value;
}
}else{
$out[$PartHeader][$name]=$value;
}
}
}else{
if($name==''){
$PartHeader++;
}else{
if(isset($out[$PartHeader][$name])){
if(is_array($out[$PartHeader][$name])){
$out[$PartHeader][$name][]=$value;
}else{
$T=$out[$PartHeader][$name];
$out[$PartHeader][$name]=array();
$out[$PartHeader][$name][0]=$T;
$out[$PartHeader][$name][1]=$name;
}
}else{
$out[$PartHeader][$name]=$name;
}
}
}
if($Run!=""){
$Run($name,$value);
}
}
return $out;
}
class cURL {
var $headers;
var $user_agent;
var $compression;
var $cookie_file;
var $proxy;
var $Cookie;
function CookieAnalysis($Cookie){//convert str cookie to array cookie
//echo $Cookie;
$this->Cookie=array();
preg_replace_callback("~(.*?)=(.*?);~si",function($m){$this->Cookie[trim($m[1])]=trim($m[2]);},' '.$Cookie.'; ');
return $this->Cookie;
}
function cURL($cookies=false,$cookie='cookies.txt',$compression='gzip',$proxy='') {
$this->headers[] = 'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
$this->headers[] = 'Accept-Charset:ISO-8859-1,utf-8;q=0.7,*;q=0.3';
$this->headers[] = 'Accept-Encoding:gzip,deflate,sdch';
$this->headers[] = 'Accept-Language:en-US,en;q=0.8';
$this->headers[] = 'Cache-Control:max-age=0';
$this->headers[] = 'Connection:keep-alive';
$this->user_agent = 'User-Agent:Mozilla/5.0 (SepidarSoft [Organic Search Engine Crawler] Linux Edition) AppleWebKit/536.5 (KHTML, like Gecko) SepidarBrowser/1.0.100.52 Safari/536.5';
$this->compression=$compression;
$this->proxy=$proxy;
$this->cookies=$cookies;
if ($this->cookies == TRUE) $this->cookie($cookie);
}
function cookie($cookie_file) {
if (file_exists($cookie_file)) {
$this->cookie_file=$cookie_file;
} else {
fopen($cookie_file,'w') or $this->error('The cookie file could not be opened. Make sure this directory has the correct permissions');
$this->cookie_file=$cookie_file;
@fclose($this->cookie_file);
}
}
function GET($url) {
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($process, CURLOPT_HEADER, 1);
curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
curl_setopt($process,CURLOPT_ENCODING , $this->compression);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
$response = curl_exec($process);
$header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE);
$result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1);
foreach($result['Header'] as $HeaderK=>$HeaderP){
foreach($HeaderP['Set-Cookie'] as $key=>$val){
$result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val);
}
}
$result['Body'] = substr( $response, $header_size );
$result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE);
$result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL);
curl_close($process);
return $result;
}
function POST($url,$data) {
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($process, CURLOPT_HEADER, 1);
curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
curl_setopt($process, CURLOPT_ENCODING , $this->compression);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy);
curl_setopt($process, CURLOPT_POSTFIELDS, $data);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($process, CURLOPT_POST, 1);
$response = curl_exec($process);
$header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE);
$result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1);
foreach($result['Header'] as $HeaderK=>$HeaderP){
foreach($HeaderP['Set-Cookie'] as $key=>$val){
$result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val);
}
}
$result['Body'] = substr( $response, $header_size );
$result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE);
$result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL);
curl_close($process);
return $result;
}
function error($error) {
echo "<center><div style='width:500px;border: 3px solid #FFEEFF; padding: 3px; background-color: #FFDDFF;font-family: verdana; font-size: 10px'><b>cURL Error</b><br>$error</div></center>";
die;
}
}
sample:
$cc = new cURL();
print_r( $cc->POST('http://www.domain.com'));
for old php
function HeaderProc($response,$Run="",$String=1/*[Is 1 IF Use for String Mode ]*/){
if($String==1){
$response=explode("\r\n",$response);
}
$PartHeader=0;
$out[$PartHeader]=array();
while(list($key,$val)=each($response)){
$name='';
$value='';
$flag=false;
for($i=0;$i<strlen($val);$i++){
if($val[$i]==":"){
$flag=true;
for($j=$i+1;$j<strlen($val);$j++){
if($val[$i]=="\r" and $val[$i+1]=="\n"){
break;
}
$value.=$val[$j];
}
break;
}
$name.=$val[$i];
}
if($flag){
if($name=='' and $value==''){
$PartHeader++;
}else{
if(isset($out[$PartHeader][$name])){
if(is_array($out[$PartHeader][$name])){
$out[$PartHeader][$name][]=$value;
}else{
$T=$out[$PartHeader][$name];
$out[$PartHeader][$name]=array();
$out[$PartHeader][$name][0]=$T;
$out[$PartHeader][$name][1]=$value;
}
}else{
$out[$PartHeader][$name]=$value;
}
}
}else{
if($name==''){
$PartHeader++;
}else{
if(isset($out[$PartHeader][$name])){
if(is_array($out[$PartHeader][$name])){
$out[$PartHeader][$name][]=$value;
}else{
$T=$out[$PartHeader][$name];
$out[$PartHeader][$name]=array();
$out[$PartHeader][$name][0]=$T;
$out[$PartHeader][$name][1]=$name;
}
}else{
$out[$PartHeader][$name]=$name;
}
}
}
if($Run!=""){
$Run($name,$value);
}
}
return $out;
}
class cURL {
var $headers;
var $user_agent;
var $compression;
var $cookie_file;
var $proxy;
var $Cookie;
function CookieAnalysis($Cookie){//convert str cookie to array cookie
//echo $Cookie;
$this->Cookie=array();
preg_match("~(.*?)=(.*?);~si",' '.$Cookie.'; ',$M);
$this->Cookie[trim($M[1])]=trim($M[2]);
return $this->Cookie;
}
function cURL($cookies=false,$cookie='cookies.txt',$compression='gzip',$proxy='') {
$this->headers[] = 'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
$this->headers[] = 'Accept-Charset:ISO-8859-1,utf-8;q=0.7,*;q=0.3';
$this->headers[] = 'Accept-Encoding:gzip,deflate,sdch';
$this->headers[] = 'Accept-Language:en-US,en;q=0.8';
$this->headers[] = 'Cache-Control:max-age=0';
$this->headers[] = 'Connection:keep-alive';
$this->user_agent = 'User-Agent:Mozilla/5.0 (SepidarSoft [Organic Search Engine Crawler] Linux Edition) AppleWebKit/536.5 (KHTML, like Gecko) SepidarBrowser/1.0.100.52 Safari/536.5';
$this->compression=$compression;
$this->proxy=$proxy;
$this->cookies=$cookies;
if ($this->cookies == TRUE) $this->cookie($cookie);
}
function cookie($cookie_file) {
if (file_exists($cookie_file)) {
$this->cookie_file=$cookie_file;
} else {
fopen($cookie_file,'w') or $this->error('The cookie file could not be opened. Make sure this directory has the correct permissions');
$this->cookie_file=$cookie_file;
@fclose($this->cookie_file);
}
}
function GET($url) {
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($process, CURLOPT_HEADER, 1);
curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
curl_setopt($process,CURLOPT_ENCODING , $this->compression);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
$response = curl_exec($process);
$header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE);
$result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1);
foreach($result['Header'] as $HeaderK=>$HeaderP){
if(!is_array($HeaderP['Set-Cookie']))continue;
foreach($HeaderP['Set-Cookie'] as $key=>$val){
$result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val);
}
}
$result['Body'] = substr( $response, $header_size );
$result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE);
$result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL);
curl_close($process);
return $result;
}
function POST($url,$data) {
$process = curl_init($url);
curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers);
curl_setopt($process, CURLOPT_HEADER, 1);
curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
curl_setopt($process, CURLOPT_ENCODING , $this->compression);
curl_setopt($process, CURLOPT_TIMEOUT, 30);
if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy);
curl_setopt($process, CURLOPT_POSTFIELDS, $data);
curl_setopt($process, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($process, CURLOPT_POST, 1);
$response = curl_exec($process);
$header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE);
$result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1);
foreach($result['Header'] as $HeaderK=>$HeaderP){
if(!is_array($HeaderP['Set-Cookie']))continue;
foreach($HeaderP['Set-Cookie'] as $key=>$val){
$result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val);
}
}
$result['Body'] = substr( $response, $header_size );
$result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE);
$result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL);
curl_close($process);
return $result;
}
function error($error) {
echo "<center><div style='width:500px;border: 3px solid #FFEEFF; padding: 3px; background-color: #FFDDFF;font-family: verdana; font-size: 10px'><b>cURL Error</b><br>$error</div></center>";
die;
}
}
sample:
$cc = new cURL();
print_r( $cc->POST('http://www.domain.com'));
Yahoo site sample header output
[Header] => Array
(
[0] => Array
(
[HTTP/1.1 302 Found] => HTTP/1.1 302 Found
[Date] => Sat, 02 Mar 2013 14:37:19 GMT
[P3P] => policyref="http://info.yahoo.com/w3c/p3p.xml", CP="CAO DSP COR CUR ADM DEV TAI PSA PSD IVAi IVDi CONi TELo OTPi OUR DELi SAMi OTRi UNRi PUBi IND PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA POL HEA PRE LOC GOV"
[Cache-Control] => private
[X-Frame-Options] => SAMEORIGIN
[Set-Cookie] => fpc=d=a2polPzlISX4q5OZQBxq.CKduGwG2Wm1YrPD59ENCUl3uTzrs.8HlnpJROO8MWa6M.B8e1JuCsbW25qwqY5zEs.mA0_EVlAVPMhFCdfCxhZf6vWmmqpPm9bVzGYs8Y7IyTG7IFp9p0MN_FPQmzNM7I8XBu4iGCI8MbHWFvOMKmhN9MTkPC4KbNJ2izSK9xBXTedDnYw-&v=2; expires=Sun, 02-Mar-2014 14:37:19 GMT; path=/; domain=www.yahoo.com
[Location] => http://en-maktoob.yahoo.com/?p=us
[Vary] => Accept-Encoding
[Content-Type] => text/html; charset=utf-8
[Age] => 0
[Transfer-Encoding] => chunked
[Connection] => keep-alive
[Server] => YTS/1.20.13
)
[1] => Array
(
[HTTP/1.1 200 OK] => HTTP/1.1 200 OK
[Date] => Sat, 02 Mar 2013 14:37:20 GMT
[P3P] => policyref="http://info.yahoo.com/w3c/p3p.xml", CP="CAO DSP COR CUR ADM DEV TAI PSA PSD IVAi IVDi CONi TELo OTPi OUR DELi SAMi OTRi UNRi PUBi IND PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA POL HEA PRE LOC GOV"
[Cache-Control] => private
[X-Frame-Options] => SAMEORIGIN
[Set-Cookie] => Array
(
[0] => Array
(
[IU] => deleted
)
[1] => Array
(
[PH] => deleted
)
[2] => Array
(
[MSC] => t=1362235040X
)
[3] => Array
(
[fpc] => d=_7tfRPjaISWhpxKrzORZ47ywABwHrUd0vF3WBQH9UYD6KMC7fyjTBdcMMh1FYiufGwiXnhHgDV9gK_VrwVf.q.n_MoJj3B4OMV5Lw42TXrYN_xGhwsnsyUPvQTy79LJ.twkY0IQ3culhr0osKxe0MvGIPSRcYDWH13TUS5YhrnIP731WRyEDZlPh2gPUXxNc1nRtr7Y-&v=2
)
[4] => Array
(
[fpms] => p_30345347=%7B%22loc%22%3A%7B%22id%22%3A1940330%2C%22city%22%3A%22Abu+Dhabi%22%2C%22state%22%3A%22%22%2C%22country%22%3A%22UAE%22%7D%7D
)
[5] => Array
(
[fpc] => d=SaiDIsbaISXJV8ztcJqpafzGA13Lsq0TPQ7HJOn_.yLYWvNZF75ELqLKTLekfVYxmFj0OxOH_thzdIa9UNQIiwYXt99qJ8HNsqpWubAPIFaO1o36VbPBUz9Qu0Rzgzh6Qh.rJQnhPnj1m3NMeFlpYZ7kpVAsjL88RMdcGP82RMUEENd9mWXC7SkuY_CIR76Ne3pEgotZlVDVMABYyxJbM4N4jqG5zkC23Gy8epD4JzxcUuTWDyUn.LZaIqX1Gn6Fcn_f6de3&v=2
)
[6] => Array
(
[fpps] => _page=%7B%22wsid%22%3A%2221445690%22%7D
)
[7] => Array
(
[fpt] => d=Zc7DH53Xe9za_cphyvUoTpFDnmIFF977Sv9yIyBJtqtpcN4aLM18CC3FKuMd6AMXylr7FJjRBtWkJYiIdmrER9MPUOFt22FcF8rNk8Lu_kQMbAEra9CnHEhP0N8DVz6iKlRji6wGv_.3pOxmx_7Td1bq2D4RtVTE93P1kVGFgxlSV7Vtdf8JUxoRTq3dMKZuNQD5vY76rjiXf64lrQ89ONTWEpCGE3MxGVHnegZ71MiuKmYPLxH.AdNFzgw_EoD5QFWyxBxC3GNq7CarXzwJ5D4Uoiw690kzihlRQ66UgGj6sAdIIB_haiXQ6pJ7Q_w86gen6FBolLLiIBrDaujASks1fNzrWOfSH7HDn3GfqcCycIXcJDw_Xb8eGBgJVZFK2yuM0BF68NOW.nkACke1I.ufHsJXrvZH51Pg4dh9hMIsqeI-&v=1
)
[8] => Array
(
[fpc_s] => d=jbVQS4TaISWRQmb4Qu6ANMqdtfYe_QawTKJ.rdl.9vdhjLe6UHD_z3Pvh2HhUHGn2i4oPThLzibGfAmid4zCCnYjxdTbby8pCY566kgiSjnvroDbRszWKfTL4j8Bew5x1VnLUqLfpKWUq2jwAOj1WdBhiSajBzp_hg.8q8O1M0XO.hd7YXRtm66BnbOtcTli3arG1nfT96JakB5i8cyNrUMl1m4czoVB7MqJDipKCfQ.19r98RG0dJELW.fFXfry0AApcU8cweMqTTIuks1LAeVRngCAX7eRfB0eknd5DOqTpZlrMTmW.JjNnbI-&v=2
)
)
[Vary] => Accept-Encoding
[Content-Type] => text/html;charset=utf-8
[Content-Encoding] => gzip
[Age] => 0
[Transfer-Encoding] => chunked
[Connection] => keep-alive
[Server] => YTS/1.20.13
)
)
Upvotes: 0