Thank you very much for your code.
It helped me to create my own script to read those pages which I need. I never programmed PHP before, but with your code and the wisdom of the internet I could change your script to my needs.
PHP
header('Access-Control-Allow-Origin: *'); //all
$url = $_GET['url'];
if (substr($url,0,25) != "https://www.xxxx.yy") {
echo "Only https://www.xxxx.yy allowed!";
return;
}
$xpathQuery = $_GET['xpath'];
//need more hard check for security, I made only basic
function check($target_url){
$check = curl_init();
//curl_setopt( $check, CURLOPT_HTTPHEADER, array("REMOTE_ADDR: $ip", "HTTP_X_FORWARDED_FOR: $ip"));
//curl_setopt($check, CURLOPT_INTERFACE, "xxx.xxx.xxx.xxx");
curl_setopt($check, CURLOPT_COOKIEJAR, 'cookiemon.txt');
curl_setopt($check, CURLOPT_COOKIEFILE, 'cookiemon.txt');
curl_setopt($check, CURLOPT_TIMEOUT, 40000);
curl_setopt($check, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($check, CURLOPT_URL, $target_url);
curl_setopt($check, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
curl_setopt($check, CURLOPT_FOLLOWLOCATION, false);
$tmp = curl_exec ($check);
curl_close ($check);
return $tmp;
}
// get html
$html = check($url);
$dom = new DOMDocument();
@$dom->loadHTML($html);
// apply xpath filter
$xpath = new DOMXPath($dom);
$elements = $xpath->query($xpathQuery);
$temp_dom = new DOMDocument();
foreach($elements as $n) $temp_dom->appendChild($temp_dom->importNode($n,true));
$renderedHtml = $temp_dom->saveHTML();
// return html in json response
// json structure:
// {html: "xxxx"}
$post_data = array(
'html' => $renderedHtml
);
echo json_encode($post_data);
?>
Javascript
$.ajax({
url: "url of service",
dataType: "json",
data: { url: url,
xpath: "//*"
},
type: 'GET',
success: function() {
},
error: function(data) {
}
});