1: <?php
2:
3: /*************************************************
4:
5: Snoopy - the PHP net client
6: Author: Monte Ohrt <monte@ispi.net>
7: Copyright (c): 1999-2008 New Digital Group, all rights reserved
8: Version: 1.2.4
9:
10: * This library is free software; you can redistribute it and/or
11: * modify it under the terms of the GNU Lesser General Public
12: * License as published by the Free Software Foundation; either
13: * version 2.1 of the License, or (at your option) any later version.
14: *
15: * This library is distributed in the hope that it will be useful,
16: * but WITHOUT ANY WARRANTY; without even the implied warranty of
17: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18: * Lesser General Public License for more details.
19: *
20: * You should have received a copy of the GNU Lesser General Public
21: * License along with this library; if not, write to the Free Software
22: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23:
24: You may contact the author of Snoopy by e-mail at:
25: monte@ohrt.com
26:
27: The latest version of Snoopy can be obtained from:
28: http://snoopy.sourceforge.net/
29:
30: *************************************************/
31:
32: class Snoopy
33: {
34: /**** Public variables ****/
35:
36: /* user definable vars */
37:
38: var $host = "www.php.net"; // host name we are connecting to
39: var $port = 80; // port we are connecting to
40: var $proxy_host = ""; // proxy host to use
41: var $proxy_port = ""; // proxy port to use
42: var $proxy_user = ""; // proxy user to use
43: var $proxy_pass = ""; // proxy password to use
44:
45: var $agent = "Snoopy v1.2.4"; // agent we masquerade as
46: var $referer = ""; // referer info to pass
47: var $cookies = array(); // array of cookies to pass
48: // $cookies["username"]="joe";
49: var $rawheaders = array(); // array of raw headers to send
50: // $rawheaders["Content-type"]="text/html";
51:
52: var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
53: var $lastredirectaddr = ""; // contains address of last redirected address
54: var $offsiteok = true; // allows redirection off-site
55: var $maxframes = 0; // frame content depth maximum. 0 = disallow
56: var $expandlinks = true; // expand links to fully qualified URLs.
57: // this only applies to fetchlinks()
58: // submitlinks(), and submittext()
59: var $passcookies = true; // pass set cookies back through redirects
60: // NOTE: this currently does not respect
61: // dates, domains or paths.
62:
63: var $user = ""; // user for http authentication
64: var $pass = ""; // password for http authentication
65:
66: // http accept types
67: var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
68:
69: var $results = ""; // where the content is put
70:
71: var $error = ""; // error messages sent here
72: var $response_code = ""; // response code returned from server
73: var $headers = array(); // headers returned from server sent here
74: var $maxlength = 500000; // max return data length (body)
75: var $read_timeout = 0; // timeout on read operations, in seconds
76: // supported only since PHP 4 Beta 4
77: // set to 0 to disallow timeouts
78: var $timed_out = false; // if a read operation timed out
79: var $status = 0; // http request status
80:
81: var $temp_dir = "/tmp"; // temporary directory that the webserver
82: // has permission to write to.
83: // under Windows, this should be C:\temp
84:
85: var $curl_path = "/usr/local/bin/curl";
86: // Snoopy will use cURL for fetching
87: // SSL content if a full system path to
88: // the cURL binary is supplied here.
89: // set to false if you do not have
90: // cURL installed. See http://curl.haxx.se
91: // for details on installing cURL.
92: // Snoopy does *not* use the cURL
93: // library functions built into php,
94: // as these functions are not stable
95: // as of this Snoopy release.
96:
97: /**** Private variables ****/
98:
99: var $_maxlinelen = 4096; // max line length (headers)
100:
101: var $_httpmethod = "GET"; // default http request method
102: var $_httpversion = "HTTP/1.0"; // default http request version
103: var $_submit_method = "POST"; // default submit method
104: var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
105: var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
106: var $_redirectaddr = false; // will be set if page fetched is a redirect
107: var $_redirectdepth = 0; // increments on an http redirect
108: var $_frameurls = array(); // frame src urls
109: var $_framedepth = 0; // increments on frame depth
110:
111: var $_isproxy = false; // set if using a proxy server
112: var $_fp_timeout = 30; // timeout for socket connection
113:
114: /*======================================================================*\
115: Function: fetch
116: Purpose: fetch the contents of a web page
117: (and possibly other protocols in the
118: future like ftp, nntp, gopher, etc.)
119: Input: $URI the location of the page to fetch
120: Output: $this->results the output text from the fetch
121: \*======================================================================*/
122:
123: function fetch($URI)
124: {
125:
126: //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
127: $URI_PARTS = parse_url($URI);
128: if (!empty($URI_PARTS["user"]))
129: $this->user = $URI_PARTS["user"];
130: if (!empty($URI_PARTS["pass"]))
131: $this->pass = $URI_PARTS["pass"];
132: if (empty($URI_PARTS["query"]))
133: $URI_PARTS["query"] = '';
134: if (empty($URI_PARTS["path"]))
135: $URI_PARTS["path"] = '';
136:
137: switch(strtolower($URI_PARTS["scheme"]))
138: {
139: case "http":
140: $this->host = $URI_PARTS["host"];
141: if(!empty($URI_PARTS["port"]))
142: $this->port = $URI_PARTS["port"];
143: if($this->_connect($fp))
144: {
145: if($this->_isproxy)
146: {
147: // using proxy, send entire URI
148: $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
149: }
150: else
151: {
152: $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
153: // no proxy, send only the path
154: $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
155: }
156:
157: $this->_disconnect($fp);
158:
159: if($this->_redirectaddr)
160: {
161: /* url was redirected, check if we've hit the max depth */
162: if($this->maxredirs > $this->_redirectdepth)
163: {
164: // only follow redirect if it's on this site, or offsiteok is true
165: if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
166: {
167: /* follow the redirect */
168: $this->_redirectdepth++;
169: $this->lastredirectaddr=$this->_redirectaddr;
170: $this->fetch($this->_redirectaddr);
171: }
172: }
173: }
174:
175: if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
176: {
177: $frameurls = $this->_frameurls;
178: $this->_frameurls = array();
179:
180: while(list(,$frameurl) = each($frameurls))
181: {
182: if($this->_framedepth < $this->maxframes)
183: {
184: $this->fetch($frameurl);
185: $this->_framedepth++;
186: }
187: else
188: break;
189: }
190: }
191: }
192: else
193: {
194: return false;
195: }
196: return true;
197: break;
198: case "https":
199: if(!$this->curl_path)
200: return false;
201: if(function_exists("is_executable"))
202: if (!is_executable($this->curl_path))
203: return false;
204: $this->host = $URI_PARTS["host"];
205: if(!empty($URI_PARTS["port"]))
206: $this->port = $URI_PARTS["port"];
207: if($this->_isproxy)
208: {
209: // using proxy, send entire URI
210: $this->_httpsrequest($URI,$URI,$this->_httpmethod);
211: }
212: else
213: {
214: $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
215: // no proxy, send only the path
216: $this->_httpsrequest($path, $URI, $this->_httpmethod);
217: }
218:
219: if($this->_redirectaddr)
220: {
221: /* url was redirected, check if we've hit the max depth */
222: if($this->maxredirs > $this->_redirectdepth)
223: {
224: // only follow redirect if it's on this site, or offsiteok is true
225: if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
226: {
227: /* follow the redirect */
228: $this->_redirectdepth++;
229: $this->lastredirectaddr=$this->_redirectaddr;
230: $this->fetch($this->_redirectaddr);
231: }
232: }
233: }
234:
235: if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
236: {
237: $frameurls = $this->_frameurls;
238: $this->_frameurls = array();
239:
240: while(list(,$frameurl) = each($frameurls))
241: {
242: if($this->_framedepth < $this->maxframes)
243: {
244: $this->fetch($frameurl);
245: $this->_framedepth++;
246: }
247: else
248: break;
249: }
250: }
251: return true;
252: break;
253: default:
254: // not a valid protocol
255: $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
256: return false;
257: break;
258: }
259: return true;
260: }
261:
262: /*======================================================================*\
263: Function: submit
264: Purpose: submit an http form
265: Input: $URI the location to post the data
266: $formvars the formvars to use.
267: format: $formvars["var"] = "val";
268: $formfiles an array of files to submit
269: format: $formfiles["var"] = "/dir/filename.ext";
270: Output: $this->results the text output from the post
271: \*======================================================================*/
272:
273: function submit($URI, $formvars="", $formfiles="")
274: {
275: unset($postdata);
276:
277: $postdata = $this->_prepare_post_body($formvars, $formfiles);
278:
279: $URI_PARTS = parse_url($URI);
280: if (!empty($URI_PARTS["user"]))
281: $this->user = $URI_PARTS["user"];
282: if (!empty($URI_PARTS["pass"]))
283: $this->pass = $URI_PARTS["pass"];
284: if (empty($URI_PARTS["query"]))
285: $URI_PARTS["query"] = '';
286: if (empty($URI_PARTS["path"]))
287: $URI_PARTS["path"] = '';
288:
289: switch(strtolower($URI_PARTS["scheme"]))
290: {
291: case "http":
292: $this->host = $URI_PARTS["host"];
293: if(!empty($URI_PARTS["port"]))
294: $this->port = $URI_PARTS["port"];
295: if($this->_connect($fp))
296: {
297: if($this->_isproxy)
298: {
299: // using proxy, send entire URI
300: $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
301: }
302: else
303: {
304: $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
305: // no proxy, send only the path
306: $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
307: }
308:
309: $this->_disconnect($fp);
310:
311: if($this->_redirectaddr)
312: {
313: /* url was redirected, check if we've hit the max depth */
314: if($this->maxredirs > $this->_redirectdepth)
315: {
316: if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
317: $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
318:
319: // only follow redirect if it's on this site, or offsiteok is true
320: if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
321: {
322: /* follow the redirect */
323: $this->_redirectdepth++;
324: $this->lastredirectaddr=$this->_redirectaddr;
325: if( strpos( $this->_redirectaddr, "?" ) > 0 )
326: $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
327: else
328: $this->submit($this->_redirectaddr,$formvars, $formfiles);
329: }
330: }
331: }
332:
333: if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
334: {
335: $frameurls = $this->_frameurls;
336: $this->_frameurls = array();
337:
338: while(list(,$frameurl) = each($frameurls))
339: {
340: if($this->_framedepth < $this->maxframes)
341: {
342: $this->fetch($frameurl);
343: $this->_framedepth++;
344: }
345: else
346: break;
347: }
348: }
349:
350: }
351: else
352: {
353: return false;
354: }
355: return true;
356: break;
357: case "https":
358: if(!$this->curl_path)
359: return false;
360: if(function_exists("is_executable"))
361: if (!is_executable($this->curl_path))
362: return false;
363: $this->host = $URI_PARTS["host"];
364: if(!empty($URI_PARTS["port"]))
365: $this->port = $URI_PARTS["port"];
366: if($this->_isproxy)
367: {
368: // using proxy, send entire URI
369: $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
370: }
371: else
372: {
373: $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
374: // no proxy, send only the path
375: $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
376: }
377:
378: if($this->_redirectaddr)
379: {
380: /* url was redirected, check if we've hit the max depth */
381: if($this->maxredirs > $this->_redirectdepth)
382: {
383: if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
384: $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
385:
386: // only follow redirect if it's on this site, or offsiteok is true
387: if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
388: {
389: /* follow the redirect */
390: $this->_redirectdepth++;
391: $this->lastredirectaddr=$this->_redirectaddr;
392: if( strpos( $this->_redirectaddr, "?" ) > 0 )
393: $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
394: else
395: $this->submit($this->_redirectaddr,$formvars, $formfiles);
396: }
397: }
398: }
399:
400: if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
401: {
402: $frameurls = $this->_frameurls;
403: $this->_frameurls = array();
404:
405: while(list(,$frameurl) = each($frameurls))
406: {
407: if($this->_framedepth < $this->maxframes)
408: {
409: $this->fetch($frameurl);
410: $this->_framedepth++;
411: }
412: else
413: break;
414: }
415: }
416: return true;
417: break;
418:
419: default:
420: // not a valid protocol
421: $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
422: return false;
423: break;
424: }
425: return true;
426: }
427:
428: /*======================================================================*\
429: Function: fetchlinks
430: Purpose: fetch the links from a web page
431: Input: $URI where you are fetching from
432: Output: $this->results an array of the URLs
433: \*======================================================================*/
434:
435: function fetchlinks($URI)
436: {
437: if ($this->fetch($URI))
438: {
439: if($this->lastredirectaddr)
440: $URI = $this->lastredirectaddr;
441: if(is_array($this->results))
442: {
443: for($x=0;$x<count($this->results);$x++)
444: $this->results[$x] = $this->_striplinks($this->results[$x]);
445: }
446: else
447: $this->results = $this->_striplinks($this->results);
448:
449: if($this->expandlinks)
450: $this->results = $this->_expandlinks($this->results, $URI);
451: return true;
452: }
453: else
454: return false;
455: }
456:
457: /*======================================================================*\
458: Function: fetchform
459: Purpose: fetch the form elements from a web page
460: Input: $URI where you are fetching from
461: Output: $this->results the resulting html form
462: \*======================================================================*/
463:
464: function fetchform($URI)
465: {
466:
467: if ($this->fetch($URI))
468: {
469:
470: if(is_array($this->results))
471: {
472: for($x=0;$x<count($this->results);$x++)
473: $this->results[$x] = $this->_stripform($this->results[$x]);
474: }
475: else
476: $this->results = $this->_stripform($this->results);
477:
478: return true;
479: }
480: else
481: return false;
482: }
483:
484:
485: /*======================================================================*\
486: Function: fetchtext
487: Purpose: fetch the text from a web page, stripping the links
488: Input: $URI where you are fetching from
489: Output: $this->results the text from the web page
490: \*======================================================================*/
491:
492: function fetchtext($URI)
493: {
494: if($this->fetch($URI))
495: {
496: if(is_array($this->results))
497: {
498: for($x=0;$x<count($this->results);$x++)
499: $this->results[$x] = $this->_striptext($this->results[$x]);
500: }
501: else
502: $this->results = $this->_striptext($this->results);
503: return true;
504: }
505: else
506: return false;
507: }
508:
509: /*======================================================================*\
510: Function: submitlinks
511: Purpose: grab links from a form submission
512: Input: $URI where you are submitting from
513: Output: $this->results an array of the links from the post
514: \*======================================================================*/
515:
516: function submitlinks($URI, $formvars="", $formfiles="")
517: {
518: if($this->submit($URI,$formvars, $formfiles))
519: {
520: if($this->lastredirectaddr)
521: $URI = $this->lastredirectaddr;
522: if(is_array($this->results))
523: {
524: for($x=0;$x<count($this->results);$x++)
525: {
526: $this->results[$x] = $this->_striplinks($this->results[$x]);
527: if($this->expandlinks)
528: $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
529: }
530: }
531: else
532: {
533: $this->results = $this->_striplinks($this->results);
534: if($this->expandlinks)
535: $this->results = $this->_expandlinks($this->results,$URI);
536: }
537: return true;
538: }
539: else
540: return false;
541: }
542:
543: /*======================================================================*\
544: Function: submittext
545: Purpose: grab text from a form submission
546: Input: $URI where you are submitting from
547: Output: $this->results the text from the web page
548: \*======================================================================*/
549:
550: function submittext($URI, $formvars = "", $formfiles = "")
551: {
552: if($this->submit($URI,$formvars, $formfiles))
553: {
554: if($this->lastredirectaddr)
555: $URI = $this->lastredirectaddr;
556: if(is_array($this->results))
557: {
558: for($x=0;$x<count($this->results);$x++)
559: {
560: $this->results[$x] = $this->_striptext($this->results[$x]);
561: if($this->expandlinks)
562: $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
563: }
564: }
565: else
566: {
567: $this->results = $this->_striptext($this->results);
568: if($this->expandlinks)
569: $this->results = $this->_expandlinks($this->results,$URI);
570: }
571: return true;
572: }
573: else
574: return false;
575: }
576:
577:
578:
579: /*======================================================================*\
580: Function: set_submit_multipart
581: Purpose: Set the form submission content type to
582: multipart/form-data
583: \*======================================================================*/
584: function set_submit_multipart()
585: {
586: $this->_submit_type = "multipart/form-data";
587: }
588:
589:
590: /*======================================================================*\
591: Function: set_submit_normal
592: Purpose: Set the form submission content type to
593: application/x-www-form-urlencoded
594: \*======================================================================*/
595: function set_submit_normal()
596: {
597: $this->_submit_type = "application/x-www-form-urlencoded";
598: }
599:
600:
601: // XOOPS2 Hack begin
602: // Added on March 4, 2003 by onokazu@xoops.org
603: /*======================================================================*\
604: Function: set_submit_xml
605: Purpose: Set the submission content type to
606: text/xml
607: \*======================================================================*/
608: function set_submit_xml()
609: {
610: $this->_submit_type = "text/xml";
611: }
612: // XOOPS2 Hack end
613:
614:
615: /*======================================================================*\
616: Private functions
617: \*======================================================================*/
618:
619:
620: /*======================================================================*\
621: Function: _striplinks
622: Purpose: strip the hyperlinks from an html document
623: Input: $document document to strip.
624: Output: $match an array of the links
625: \*======================================================================*/
626:
627: function _striplinks($document)
628: {
629: preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
630: ([\"\'])? # find single or double quote
631: (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
632: # quote, otherwise match up to next space
633: 'isx",$document,$links);
634:
635:
636: // catenate the non-empty matches from the conditional subpattern
637:
638: while(list($key,$val) = each($links[2]))
639: {
640: if(!empty($val))
641: $match[] = $val;
642: }
643:
644: while(list($key,$val) = each($links[3]))
645: {
646: if(!empty($val))
647: $match[] = $val;
648: }
649:
650: // return the links
651: return $match;
652: }
653:
654: /*======================================================================*\
655: Function: _stripform
656: Purpose: strip the form elements from an html document
657: Input: $document document to strip.
658: Output: $match an array of the links
659: \*======================================================================*/
660:
661: function _stripform($document)
662: {
663: preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
664:
665: // catenate the matches
666: $match = implode("\r\n",$elements[0]);
667:
668: // return the links
669: return $match;
670: }
671:
672:
673:
674: /*======================================================================*\
675: Function: _striptext
676: Purpose: strip the text from an html document
677: Input: $document document to strip.
678: Output: $text the resulting text
679: \*======================================================================*/
680:
681: function _striptext($document)
682: {
683:
684: // I didn't use preg eval (//e) since that is only available in PHP 4.0.
685: // so, list your entities one by one here. I included some of the
686: // more common ones.
687:
688: $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
689: "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
690: "'([\r\n])[\s]+'", // strip out white space
691: "'&(quot|#34|#034|#x22);'i", // replace html entities
692: "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
693: "'&(lt|#60|#060|#x3c);'i",
694: "'&(gt|#62|#062|#x3e);'i",
695: "'&(nbsp|#160|#xa0);'i",
696: "'&(iexcl|#161);'i",
697: "'&(cent|#162);'i",
698: "'&(pound|#163);'i",
699: "'&(copy|#169);'i",
700: "'&(reg|#174);'i",
701: "'&(deg|#176);'i",
702: "'&(#39|#039|#x27);'",
703: "'&(euro|#8364);'i", // europe
704: "'&a(uml|UML);'", // german
705: "'&o(uml|UML);'",
706: "'&u(uml|UML);'",
707: "'&A(uml|UML);'",
708: "'&O(uml|UML);'",
709: "'&U(uml|UML);'",
710: "'ß'i",
711: );
712: $replace = array( "",
713: "",
714: "\\1",
715: "\"",
716: "&",
717: "<",
718: ">",
719: " ",
720: chr(161),
721: chr(162),
722: chr(163),
723: chr(169),
724: chr(174),
725: chr(176),
726: chr(39),
727: chr(128),
728: /*
729: * use CHR code for UTF-8
730: * Marijuana
731: */
732: chr(228),
733: chr(246),
734: chr(252),
735: chr(196),
736: chr(214),
737: chr(220),
738: chr(223),
739: );
740:
741: $text = preg_replace($search,$replace,$document);
742:
743: return $text;
744: }
745:
746: /*======================================================================*\
747: Function: _expandlinks
748: Purpose: expand each link into a fully qualified URL
749: Input: $links the links to qualify
750: $URI the full URI to get the base from
751: Output: $expandedLinks the expanded links
752: \*======================================================================*/
753:
754: function _expandlinks($links,$URI)
755: {
756:
757: preg_match("/^[^\?]+/",$URI,$match);
758:
759: $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
760: $match = preg_replace("|/$|","",$match);
761: $match_part = parse_url($match);
762: $match_root =
763: $match_part["scheme"]."://".$match_part["host"];
764:
765: $search = array( "|^http://".preg_quote($this->host)."|i",
766: "|^(\/)|i",
767: "|^(?!http://)(?!mailto:)|i",
768: "|/\./|",
769: "|/[^\/]+/\.\./|"
770: );
771:
772: $replace = array( "",
773: $match_root."/",
774: $match."/",
775: "/",
776: "/"
777: );
778:
779: $expandedLinks = preg_replace($search,$replace,$links);
780:
781: return $expandedLinks;
782: }
783:
784: /*======================================================================*\
785: Function: _httprequest
786: Purpose: go get the http data from the server
787: Input: $url the url to fetch
788: $fp the current open file pointer
789: $URI the full URI
790: $body body contents to send if any (POST)
791: Output:
792: \*======================================================================*/
793:
794: function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
795: {
796: $cookie_headers = '';
797: if($this->passcookies && $this->_redirectaddr)
798: $this->setcookies();
799:
800: $URI_PARTS = parse_url($URI);
801: if(empty($url))
802: $url = "/";
803: $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
804: if(!empty($this->agent))
805: $headers .= "User-Agent: ".$this->agent."\r\n";
806: if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
807: $headers .= "Host: ".$this->host;
808: /**
809: * when port is 80, port no should not be assigned
810: * Marijuana
811: */
812: if(!empty($this->port) && $this->port != 80)
813: $headers .= ":".$this->port;
814: $headers .= "\r\n";
815: }
816: if(!empty($this->accept))
817: $headers .= "Accept: ".$this->accept."\r\n";
818: if(!empty($this->referer))
819: $headers .= "Referer: ".$this->referer."\r\n";
820: if(!empty($this->cookies))
821: {
822: if(!is_array($this->cookies))
823: $this->cookies = (array)$this->cookies;
824:
825: reset($this->cookies);
826: if ( count($this->cookies) > 0 ) {
827: $cookie_headers .= 'Cookie: ';
828: foreach ( $this->cookies as $cookieKey => $cookieVal ) {
829: $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
830: }
831: $headers .= substr($cookie_headers,0,-2) . "\r\n";
832: }
833: }
834: if(!empty($this->rawheaders))
835: {
836: if(!is_array($this->rawheaders))
837: $this->rawheaders = (array)$this->rawheaders;
838: while(list($headerKey,$headerVal) = each($this->rawheaders))
839: $headers .= $headerKey.": ".$headerVal."\r\n";
840: }
841: if(!empty($content_type)) {
842: $headers .= "Content-type: $content_type";
843: if ($content_type == "multipart/form-data")
844: $headers .= "; boundary=".$this->_mime_boundary;
845: $headers .= "\r\n";
846: }
847: if(!empty($body))
848: $headers .= "Content-length: ".strlen($body)."\r\n";
849: if(!empty($this->user) || !empty($this->pass))
850: $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
851:
852: //add proxy auth headers
853: if(!empty($this->proxy_user))
854: $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
855:
856:
857: $headers .= "\r\n";
858:
859: // set the read timeout if needed
860: if ($this->read_timeout > 0)
861: socket_set_timeout($fp, $this->read_timeout);
862: $this->timed_out = false;
863:
864: fwrite($fp,$headers.$body,strlen($headers.$body));
865:
866: $this->_redirectaddr = false;
867: unset($this->headers);
868:
869: while($currentHeader = fgets($fp,$this->_maxlinelen))
870: {
871: if ($this->read_timeout > 0 && $this->_check_timeout($fp))
872: {
873: $this->status=-100;
874: return false;
875: }
876:
877: if($currentHeader == "\r\n")
878: break;
879:
880: // if a header begins with Location: or URI:, set the redirect
881: if(preg_match("/^(Location:|URI:)/i",$currentHeader))
882: {
883: // get URL portion of the redirect
884: preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
885: // look for :// in the Location header to see if hostname is included
886: if(!preg_match("|\:\/\/|",$matches[2]))
887: {
888: // no host in the path, so prepend
889: $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
890: // eliminate double slash
891: if(!preg_match("|^/|",$matches[2]))
892: $this->_redirectaddr .= "/".$matches[2];
893: else
894: $this->_redirectaddr .= $matches[2];
895: }
896: else
897: $this->_redirectaddr = $matches[2];
898: }
899:
900: if(preg_match("|^HTTP/|",$currentHeader))
901: {
902: if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
903: {
904: $this->status= $status[1];
905: }
906: $this->response_code = $currentHeader;
907: }
908:
909: $this->headers[] = $currentHeader;
910: }
911:
912: $results = '';
913: do {
914: $_data = fread($fp, $this->maxlength);
915: if (strlen($_data) == 0) {
916: break;
917: }
918: $results .= $_data;
919: } while(true);
920:
921: if ($this->read_timeout > 0 && $this->_check_timeout($fp))
922: {
923: $this->status=-100;
924: return false;
925: }
926:
927: // check if there is a a redirect meta tag
928:
929: if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
930:
931: {
932: $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
933: }
934:
935: // have we hit our frame depth and is there frame src to fetch?
936: if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
937: {
938: $this->results[] = $results;
939: for($x=0; $x<count($match[1]); $x++)
940: $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
941: }
942: // have we already fetched framed content?
943: elseif(is_array($this->results))
944: $this->results[] = $results;
945: // no framed content
946: else
947: $this->results = $results;
948:
949: return true;
950: }
951:
952: /*======================================================================*\
953: Function: _httpsrequest
954: Purpose: go get the https data from the server using curl
955: Input: $url the url to fetch
956: $URI the full URI
957: $body body contents to send if any (POST)
958: Output:
959: \*======================================================================*/
960:
961: function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
962: {
963: if($this->passcookies && $this->_redirectaddr)
964: $this->setcookies();
965:
966: $headers = array();
967:
968: $URI_PARTS = parse_url($URI);
969: if(empty($url))
970: $url = "/";
971: // GET ... header not needed for curl
972: //$headers[] = $http_method." ".$url." ".$this->_httpversion;
973: if(!empty($this->agent))
974: $headers[] = "User-Agent: ".$this->agent;
975: if(!empty($this->host))
976: /**
977: * when port is 80, port no should not be assigned
978: * Marijuana
979: */
980: if(!empty($this->port) && $this->port != 80)
981: $headers[] = "Host: ".$this->host.":".$this->port;
982: else
983: $headers[] = "Host: ".$this->host;
984: if(!empty($this->accept))
985: $headers[] = "Accept: ".$this->accept;
986: if(!empty($this->referer))
987: $headers[] = "Referer: ".$this->referer;
988: if(!empty($this->cookies))
989: {
990: if(!is_array($this->cookies))
991: $this->cookies = (array)$this->cookies;
992:
993: reset($this->cookies);
994: if ( count($this->cookies) > 0 ) {
995: $cookie_str = 'Cookie: ';
996: foreach ( $this->cookies as $cookieKey => $cookieVal ) {
997: $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
998: }
999: $headers[] = substr($cookie_str,0,-2);
1000: }
1001: }
1002: if(!empty($this->rawheaders))
1003: {
1004: if(!is_array($this->rawheaders))
1005: $this->rawheaders = (array)$this->rawheaders;
1006: while(list($headerKey,$headerVal) = each($this->rawheaders))
1007: $headers[] = $headerKey.": ".$headerVal;
1008: }
1009: if(!empty($content_type)) {
1010: if ($content_type == "multipart/form-data")
1011: $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
1012: else
1013: $headers[] = "Content-type: $content_type";
1014: }
1015: if(!empty($body))
1016: $headers[] = "Content-length: ".strlen($body);
1017: if(!empty($this->user) || !empty($this->pass))
1018: $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1019:
1020: for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
1021: $safer_header = strtr( $headers[$curr_header], "\"", " " );
1022: $cmdline_params .= " -H \"".$safer_header."\"";
1023: }
1024:
1025: if(!empty($body))
1026: $cmdline_params .= " -d \"$body\"";
1027:
1028: if($this->read_timeout > 0)
1029: $cmdline_params .= " -m ".$this->read_timeout;
1030:
1031: $headerfile = tempnam($temp_dir, "sno");
1032:
1033: exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
1034:
1035: if($return)
1036: {
1037: $this->error = "Error: cURL could not retrieve the document, error $return.";
1038: return false;
1039: }
1040:
1041:
1042: $results = implode("\r\n",$results);
1043:
1044: $result_headers = file("$headerfile");
1045:
1046: $this->_redirectaddr = false;
1047: unset($this->headers);
1048:
1049: for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1050: {
1051:
1052: // if a header begins with Location: or URI:, set the redirect
1053: if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1054: {
1055: // get URL portion of the redirect
1056: preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1057: // look for :// in the Location header to see if hostname is included
1058: if(!preg_match("|\:\/\/|",$matches[2]))
1059: {
1060: // no host in the path, so prepend
1061: $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1062: // eliminate double slash
1063: if(!preg_match("|^/|",$matches[2]))
1064: $this->_redirectaddr .= "/".$matches[2];
1065: else
1066: $this->_redirectaddr .= $matches[2];
1067: }
1068: else
1069: $this->_redirectaddr = $matches[2];
1070: }
1071:
1072: if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1073: $this->response_code = $result_headers[$currentHeader];
1074:
1075: $this->headers[] = $result_headers[$currentHeader];
1076: }
1077:
1078: // check if there is a a redirect meta tag
1079:
1080: if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1081: {
1082: $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1083: }
1084:
1085: // have we hit our frame depth and is there frame src to fetch?
1086: if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1087: {
1088: $this->results[] = $results;
1089: for($x=0; $x<count($match[1]); $x++)
1090: $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1091: }
1092: // have we already fetched framed content?
1093: elseif(is_array($this->results))
1094: $this->results[] = $results;
1095: // no framed content
1096: else
1097: $this->results = $results;
1098:
1099: unlink("$headerfile");
1100:
1101: return true;
1102: }
1103:
1104: /*======================================================================*\
1105: Function: setcookies()
1106: Purpose: set cookies for a redirection
1107: \*======================================================================*/
1108:
1109: function setcookies()
1110: {
1111: for($x=0; $x<count($this->headers); $x++)
1112: {
1113: if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1114: $this->cookies[$match[1]] = urldecode($match[2]);
1115: }
1116: }
1117:
1118:
1119: /*======================================================================*\
1120: Function: _check_timeout
1121: Purpose: checks whether timeout has occurred
1122: Input: $fp file pointer
1123: \*======================================================================*/
1124:
1125: function _check_timeout($fp)
1126: {
1127: if ($this->read_timeout > 0) {
1128: $fp_status = socket_get_status($fp);
1129: if ($fp_status["timed_out"]) {
1130: $this->timed_out = true;
1131: return true;
1132: }
1133: }
1134: return false;
1135: }
1136:
1137: /*======================================================================*\
1138: Function: _connect
1139: Purpose: make a socket connection
1140: Input: $fp file pointer
1141: \*======================================================================*/
1142:
1143: function _connect(&$fp)
1144: {
1145: if(!empty($this->proxy_host) && !empty($this->proxy_port))
1146: {
1147: $this->_isproxy = true;
1148:
1149: $host = $this->proxy_host;
1150: $port = $this->proxy_port;
1151: }
1152: else
1153: {
1154: $host = $this->host;
1155: $port = $this->port;
1156: }
1157:
1158: $this->status = 0;
1159:
1160: if($fp = fsockopen(
1161: $host,
1162: $port,
1163: $errno,
1164: $errstr,
1165: $this->_fp_timeout
1166: ))
1167: {
1168: // socket connection succeeded
1169:
1170: return true;
1171: }
1172: else
1173: {
1174: // socket connection failed
1175: $this->status = $errno;
1176: switch($errno)
1177: {
1178: case -3:
1179: $this->error="socket creation failed (-3)";
1180: case -4:
1181: $this->error="dns lookup failure (-4)";
1182: case -5:
1183: $this->error="connection refused or timed out (-5)";
1184: default:
1185: $this->error="connection failed (".$errno.")";
1186: }
1187: return false;
1188: }
1189: }
1190: /*======================================================================*\
1191: Function: _disconnect
1192: Purpose: disconnect a socket connection
1193: Input: $fp file pointer
1194: \*======================================================================*/
1195:
1196: function _disconnect($fp)
1197: {
1198: return(fclose($fp));
1199: }
1200:
1201:
1202: /*======================================================================*\
1203: Function: _prepare_post_body
1204: Purpose: Prepare post body according to encoding type
1205: Input: $formvars - form variables
1206: $formfiles - form upload files
1207: Output: post body
1208: \*======================================================================*/
1209:
1210: function _prepare_post_body($formvars, $formfiles)
1211: {
1212: settype($formvars, "array");
1213: settype($formfiles, "array");
1214: $postdata = '';
1215:
1216: if (count($formvars) == 0 && count($formfiles) == 0)
1217: return;
1218:
1219: switch ($this->_submit_type) {
1220: case "application/x-www-form-urlencoded":
1221: reset($formvars);
1222: while(list($key,$val) = each($formvars)) {
1223: if (is_array($val) || is_object($val)) {
1224: while (list($cur_key, $cur_val) = each($val)) {
1225: $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1226: }
1227: } else
1228: $postdata .= urlencode($key)."=".urlencode($val)."&";
1229: }
1230: break;
1231:
1232: case "multipart/form-data":
1233: $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1234:
1235: reset($formvars);
1236: while(list($key,$val) = each($formvars)) {
1237: if (is_array($val) || is_object($val)) {
1238: while (list($cur_key, $cur_val) = each($val)) {
1239: $postdata .= "--".$this->_mime_boundary."\r\n";
1240: $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1241: $postdata .= "$cur_val\r\n";
1242: }
1243: } else {
1244: $postdata .= "--".$this->_mime_boundary."\r\n";
1245: $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1246: $postdata .= "$val\r\n";
1247: }
1248: }
1249:
1250: reset($formfiles);
1251: while (list($field_name, $file_names) = each($formfiles)) {
1252: settype($file_names, "array");
1253: while (list(, $file_name) = each($file_names)) {
1254: if (!is_readable($file_name)) continue;
1255:
1256: $fp = fopen($file_name, "r");
1257: $file_content = fread($fp, filesize($file_name));
1258: fclose($fp);
1259: $base_name = basename($file_name);
1260:
1261: $postdata .= "--".$this->_mime_boundary."\r\n";
1262: $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1263: $postdata .= "$file_content\r\n";
1264: }
1265: }
1266: $postdata .= "--".$this->_mime_boundary."--\r\n";
1267: break;
1268: // XOOPS2 Hack begin
1269: // Added on March 4, 2003 by onokazu@xoops.org
1270: case "text/xml":
1271: default:
1272: $postdata = $formvars[0];
1273: break;
1274: // XOOPS2 Hack end
1275: }
1276:
1277: return $postdata;
1278: }
1279: }
1280:
1281: ?>
1282: