Scraping data from another website has become very popular in recent years. However, you should make sure that you have permission from the website that you want to scrape before you do this. With that said, here’s an example of how to scrape images from a website – how you choose to process the information is up to you:
view source
print?
1 <?php
2 $website_url='www.somewebsite.com';
3
4 $curl = curl_init($website_url);
5 curl_setopt($curl, CURLOPT_AUTOREFERER, true);
6 curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
7 curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1 );
8 curl_setopt($curl, CURLOPT_TIMEOUT, 2 );
9 $html = curl_exec( $curl );
10 curl_close( $curl );
11
12 $dom = new DOMDocument;
13 @$dom->loadHTML($html);
14
15 $links = $dom->getElementsByTagName('img');
16
17 $web_pic_arr;
18 $web_src_arr;
19
20 foreach ($links as $link){
21 $img_check_error=0;
22 $raw_img_url = $link->getAttribute('src');
23 $img_final_link = $raw_img_url;
24
25 $img_url = explode('http://www.', $raw_img_url);
26 $img_check = $img_url[1];
27
28 if($img_check==''){
29 $img_url = explode('http://', $raw_img_url);
30 $img_check = $img_url[1];
31 if($img_check!=''){ $img_check_error=1; }
32 if($img_check==''){ $img_check_error=2; }
33 }
34
35 switch($img_check_error){
36 case 0:
37 $web_src_arr[] = $link->getAttribute('src'); break;
38
39 case 1:
40 $web_src_arr[] = $link->getAttribute('src'); break;
41
42 case 2:
43 $web_src_arr[] = $website_url.'/'.$link->getAttribute('src'); break;
44 }
45 } // end foreach loop
46
47 // you can write a function to process the data however you wish
48 // here's an example of calling a function that would save the images
49 // save_images($web_src_arr, $dest, $minWidth, $minHeight);
50 ?>
Source: http://www.joeydigital.net/source_code/php/scrape-images-from-another-website-with-php/
view source
print?
1 <?php
2 $website_url='www.somewebsite.com';
3
4 $curl = curl_init($website_url);
5 curl_setopt($curl, CURLOPT_AUTOREFERER, true);
6 curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
7 curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1 );
8 curl_setopt($curl, CURLOPT_TIMEOUT, 2 );
9 $html = curl_exec( $curl );
10 curl_close( $curl );
11
12 $dom = new DOMDocument;
13 @$dom->loadHTML($html);
14
15 $links = $dom->getElementsByTagName('img');
16
17 $web_pic_arr;
18 $web_src_arr;
19
20 foreach ($links as $link){
21 $img_check_error=0;
22 $raw_img_url = $link->getAttribute('src');
23 $img_final_link = $raw_img_url;
24
25 $img_url = explode('http://www.', $raw_img_url);
26 $img_check = $img_url[1];
27
28 if($img_check==''){
29 $img_url = explode('http://', $raw_img_url);
30 $img_check = $img_url[1];
31 if($img_check!=''){ $img_check_error=1; }
32 if($img_check==''){ $img_check_error=2; }
33 }
34
35 switch($img_check_error){
36 case 0:
37 $web_src_arr[] = $link->getAttribute('src'); break;
38
39 case 1:
40 $web_src_arr[] = $link->getAttribute('src'); break;
41
42 case 2:
43 $web_src_arr[] = $website_url.'/'.$link->getAttribute('src'); break;
44 }
45 } // end foreach loop
46
47 // you can write a function to process the data however you wish
48 // here's an example of calling a function that would save the images
49 // save_images($web_src_arr, $dest, $minWidth, $minHeight);
50 ?>
Source: http://www.joeydigital.net/source_code/php/scrape-images-from-another-website-with-php/
No comments:
Post a Comment