<?php
/*
   Class: wgetHandler

   Implements a interface to run downloads with wget, intended to be used in shell scripts but also usable in a web enviroment.

   Author:
    (c) 2012, Tobias Baeumer <TobiasBaeumer@gmail.com>

   License:
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

class wgetHandler {
    /*
      String: strPathTowget

      Path to wget binary to use. Can be relative, full, or just "wget" if the binary is in path.

      Default:
        string - /usr/bin/wget
    */
    public $strPathTowget = '/usr/bin/wget';

    /*
      String: strParams

      Commandline params/options to use when launching wget.

      Notes:
        Dont add '--referer' or '--user-agent' here, set them through strReferer (see <doDownload>) and <strUserAgent> instead, else your values would be overwritten by those.

      Default:
        string - -e robots=off --follow-tags=meta --follow-ftp --max-redirect 5 --span-hosts --cookies=on --load-cookies=cookies.txt
    */
    public $strParams = '-e robots=off --follow-tags=meta --follow-ftp --max-redirect 5 --span-hosts --cookies=on --load-cookies=cookies.txt';

    /*
      String: strUserAgent

      HTTP UserAgent to send. If set to null this param will be omitted and wget will send its own UserAgent.

      Default:
        const - null
    */
    public $strUserAgent = null;

    /*
      Array: arrErrorDescriptions

      Descriptions of wget return values, where key == return and vice versa.
      These strings will just be used as return value for <doDownload> on fail and can be translated.

      Notes:
        Same like <intReturnValue>: Dont trust this value when using `intLiveMode` for <doDownload>.

      Default:
        array - English descriptions, see wget manpage for details.
    */
    public $arrErrorDescriptions = array(
                                            '1' => 'Generic error occurred',
                                            '2' => 'Parse error - for instance, when parsing command-line options, the .wgetrc or .netrc ',
                                            '3' => 'File I/O error',
                                            '4' => 'Network failure',
                                            '5' => 'SSL verification failure',
                                            '6' => 'Username/password authentication failure',
                                            '7' => 'Protocol errors',
                                            '8' => 'Server issued an error response'
                                        );

    /*
      Array: arrCookies

      Cookies to send with download requests. Array keys will be used a cookie name, values as (surprise!) values.

      Default:
        array - Empty array
    */
    public $arrCookies = array();

    /*
      Integer: intReturnValue

      Will get wgets return value when <doDownload> finished.

      Notes:
        Dont trust this value when using `intLiveMode` for <doDownload>.

      Default:
        integer - 99 (as placeholder for init, no meaning)
    */
    public $intReturnValue = 99;

    /*
      String: strDownloadPath

      Path (full or relative) to target directory, without trailing slash, for downloaded files.
      Must be writeable of course, so maybe a chmod is needed.

      Notes:
        Will also be used for cookies.txt (used for passing cookies to wget) and .wgetout (used for intLiveMode in <doDownload>)

      Default:
        string - ./downloads
    */
    public $strDownloadPath = './downloads';

    /*
      Array: arrOutputLines

      Will be filled with stdout (and stderr, since we use output redirection) from wget. Used by <_parseOutput>.

      Notes:
        This is also available, despite using "to file" output redirection and detached exec(), when using intLiveMode in <doDownload>.

      Default:
        array - Empty array

      See Also:
        <_parseOutput>
    */
    private $arrOutputLines = array();

    /*
      Object: objDownloadResult

      Will get the download results

      Default:
        none

      See Also:
        <_parseOutput>, <arrOutputLines>
    */
    public $objDownloadResult;

    /*
      Array: arrIndicatorChars

      Each element contains one char, used in order of appearance in array for intLiveMode=3 param in <doDownload>.

      Default:
        array - |, /, -, \, |, /, -, \

      See Also:
        <doDownload>
    */
    public $arrIndicatorChars = array('|', '/', '-', '\\', '|', '/', '-', '\\');


    /*
      Constructor: __construct()

      Initialize <objDownloadResult>
    */
    public function __construct() {
        $this->objDownloadResult = (object) array("AvgSpeed" => "0 KB/s", "Time" => "0s", "Size" => "0,00 KB");
    }


    /*
      Destructor: __destruct()

      Just delete the cookies.txt file, it it exists.
    */
    public function __destruct() {
        // Delete cookies file if it exists
        if( file_exists($this->strDownloadPath."/cookies.txt") ) {
            unlink($this->strDownloadPath."/cookies.txt");
        }
        unset($this);
    }

    /*
      Function: _saveCookieFile

      Save cookies from <arrCookies> to cookies.txt

      Parameters:
        string strDomainCookieScope - the domain/host for which these cookies are valid

      Returns:
        boolean - true, if save was successful
            or
        boolean - false, if an error occurred (most likely because of permission problems)

      See Also:
        <arrCookies>
    */
    private function _saveCookieFile($strDomainCookieScope) {
        // Delete cookies file if it exists
        if( file_exists($this->strDownloadPath."/cookies.txt") ) {
            unlink($this->strDownloadPath."/cookies.txt");
        }

        if(is_array($this->arrCookies) and count($this->arrCookies) > 0) {
            // Prepare cookies.txt for saving
            // Note: See http://blog.omnux.com/index.php/2008/03/25/cookiestxt-file-format/ for format info
            $_tempCookieLine = "";
            foreach($this->arrCookies as $key => $val) {
                $_tempCookieLine .= $strDomainCookieScope." TRUE / FALSE 0 ".$key." ".$val."\n";
            }

            // Actually save the file
            if(file_put_contents($this->strDownloadPath."/cookies.txt", $_tempCookieLine) !== false) {
                return true;
            } else {
                return false;
            }
        } else {
            return true;
        }
    }

    /*
      Function: _parseOutput

      Takes an array with output lines of wget and get average download speed, time needed and file size from it.

      Returns:
        object
            - string - AvgSpeed,
            - string - Size (depending on filesize it will be either in KB, MB or GB, with trailing unit),
            - string - Time

            or

        boolean - false, if server file didn't changed

      See Also:
        <arrOutputLines>
    */
    private function _parseOutput() {
        // Check if we have a result at all
        if(substr_count($this->arrOutputLines[5], "Server file no newer than local file") < 1) {
            // Get avg. download speed
            $_avgSpeed = preg_match('| \(([0-9,?]+) ([a-zA-Z/]+)\) |', $this->arrOutputLines[count($this->arrOutputLines)-2], $matchesSpd);
            if($_avgSpeed == 0) $_avgSpeed = preg_match('| \(([0-9,?]+) ([a-zA-Z/]+)\) |', $this->arrOutputLines[count($this->arrOutputLines)-3], $matchesSpd);

            // Get time need for download
            $_time = explode("=", $this->arrOutputLines[count($this->arrOutputLines)-4]);
            if(count($_time) != 2) $_time = explode("=", $this->arrOutputLines[count($this->arrOutputLines)-5]);

            // Crawl the output array till we find the line with file size information
            // Note: Ineffective, but necessary - position of line in array can vary depending on redirections etc. We gonna break out asap.
            foreach($this->arrOutputLines as $outLine) {
                if(substr($outLine, 0, 6) == "Length") {
                   $_size = preg_match('|Length: ([0-9]+) |', $outLine, $matchesSize);
                   if($matchesSize[1] > 1000000000) $_size = number_format((($matchesSize[1] / 1024) / 1024 / 1024), 2, ",", ".")." GB";
                   elseif($matchesSize[1] > 10000000) $_size = number_format((($matchesSize[1] / 1024) / 1024), 2, ",", ".")." MB";
                   elseif($matchesSize[1] <= 10000000) $_size = number_format(($matchesSize[1] / 1024), 2, ",", ".")." KB";
                   break;
                }
            }

            // Build and return object with results
            return (object) array(
                                  "AvgSpeed" => trim($matchesSpd[1].' '.$matchesSpd[2]),
                                  "Time" => trim($_time[1]),
                                  "Size" => trim($_size)
                            );
        } else return false;
    }

    /*
      Function: doDownload

      Downloads a given url

      Parameters:
        string strURL - URL to download
        string strTargetFile - target file for download (defaults to server given name)
        string strReferer - HTTP referer to send (defaults to none)
        integer intLiveMode - If greater then 0, will run wget in background and print live status for which this value is taken as mode switch. See Notes.
        boolean bDontDeleteTargetFile - defaults to false, will skip target file deletion if set to true.

      Returns:
        boolean - true, if download was successful

            or

        string - containing error description

      Notes:
        intLiveMode can be 1, 2 or 3. These modes have the following meaning:
            - 1: Print dots each time the progress percentage in wgets output changes. The "dot-bar" will start and end with a space.
            - 2: Print percentage value, update it by moving the cursor back with ANSI codes. This mode is for shell scripts.
            - 3: Print a spinning 1-Char length animation on cursor position, again using ANSI codes for positioning. You can set the characters to use in <arrIndicatorChars>.

      See Also:
        <_saveCookieFile>, <strPathTowget>, <strParams>, <arrOutputLines>, <intReturnValue>, <objDownloadResult>, <_parseOutput>, <arrErrorDescriptions>, <arrIndicatorChars>
    */
    public function doDownload($strURL, $strTargetFile="", $strReferer="", $intLiveMode=0, $bDontDeleteTargetFile=false) {
        // If we can't write the cookie file, we most likely also cant write the target file so fail here.
        if(!$this->_saveCookieFile(parse_url($strURL, PHP_URL_HOST))) return false;
        $this->arrOutputLines = array();

        if($intLiveMode == 0) { $_strOutRedir = " 2>&1"; }
        else {
            if(file_exists($this->strDownloadPath.'/.wgetout')) unlink($this->strDownloadPath.'/.wgetout');
            $_strOutRedir = ' >> '.$this->strDownloadPath.'/.wgetout 2>&1 &';
        }

        if($strTargetFile == "") {  // If filename is given by url, get pure filename and delete old download
            if(!$bDontDeleteTargetFile and file_exists($filetmp[count($filetmp)-1])) {
                $filetmp = explode('/', $strURL);
                unlink($filetmp[count($filetmp)-1]);
                unset($filetmp);
            }

            if($this->strUserAgent == null)
                $_cmd = "export LC_MESSAGES=POSIX && ".$this->strPathTowget." ".$this->strParams." --referer='".$strReferer."' ".$strURL.$_strOutRedir;
            else
                $_cmd = "export LC_MESSAGES=POSIX && ".$this->strPathTowget." ".$this->strParams." --referer='".$strReferer."' --user-agent='".$this->strUserAgent."' ".$strURL.$_strOutRedir;
        } else { // If filename is given by param, delete old dl and add -O param to wget
            if(!$bDontDeleteTargetFile and file_exists($strTargetFile)) {
                unlink($strTargetFile);
            }

            if($this->strUserAgent == null) $_cmd = "export LC_MESSAGES=POSIX && ".$this->strPathTowget." ".$this->strParams." --referer='".$strReferer."' -O ".$this->strDownloadPath.'/'.$strTargetFile." ".$strURL.$_strOutRedir;
            else $_cmd = "export LC_MESSAGES=POSIX && ".$this->strPathTowget." ".$this->strParams." --referer='".$strReferer."' --user-agent='".$this->strUserAgent."' -O ".$this->strDownloadPath.'/'.$strTargetFile." ".$strURL.$_strOutRedir;
        }
        exec($_cmd, $this->arrOutputLines, $this->intReturnValue);

        if($intLiveMode > 0) {
            exec("pgrep wget", $wgetPIDs);
            $_lastPrinted = '';
            $_Char = 0;
            while(!empty($wgetPIDs)) { // do while wget is running
                unset($matches, $_wgetOutFile, $_percentageFound, $wgetPIDs);
                $_wgetOutFile = file($this->strDownloadPath.'/.wgetout');
                if($_wgetOutFile !== false and count($_wgetOutFile) > 1) {
                    $_percentageFound = preg_match('|(?:.+) ([0-9]{1,3})% (?:.+)|is', $_wgetOutFile[(count($_wgetOutFile)-1)], $matches);
                    if($_percentageFound === 0) $_percentageFound = preg_match('|(?:.+) ([0-9]{1,3})% (?:.+)|is', $_wgetOutFile[(count($_wgetOutFile)-2)], $matches);
                    if($_percentageFound === 0) $_percentageFound = preg_match('|(?:.+) ([0-9]{1,3})% (?:.+)|is', $_wgetOutFile[(count($_wgetOutFile)-3)], $matches);

                    if(($_percentageFound and $matches[1] != "" and $matches[1] != $_lastPrinted) or $intLiveMode == 3) {
                        print "\033[0m";
                        switch($intLiveMode) {
                            case 1: // Mode 1 - Simply print dots each time the percentage/progress changed
                                if($matches[1] != $_lastPrinted) print '.';
                                break;
                            case 2: // Mode 2 - Print percentage and update it, setting the cursor back with ansi codes (for shell scripts)
                                if($_lastPrinted == '') print "    ";
                                print "\033[4D";
                                print str_pad($matches[1], 3, ' ', STR_PAD_LEFT).'%';
                                break;
                            case 3: // Mode 3 - Print 1-char-based animated indicator
                                if($_lastPrinted == '') {
                                    print ' ';
                                    $_lastPrinted = '!';
                                }
                                print "\033[1D";
                                print $this->arrIndicatorChars[$_Char];
                                if(($_Char + 1) < count($this->arrIndicatorChars)) $_Char++;
                                else $_Char = 0;
                                break;
                            default: break;
                        }
                        if($_percentageFound) $_lastPrinted = $matches[1];
                    }
                }
                flush(); @ob_flush();
                exec("pgrep wget", $wgetPIDs);
                sleep(0.25);
            }
            if($intLiveMode == 1 and $_lastPrinted != '') print " ";
            if($intLiveMode == 2 and $_lastPrinted != '') print "\033[5D ";
            if($intLiveMode == 3 and $_lastPrinted != '') print "\033[2D ";
        }

        if($this->intReturnValue == 0) { // If download was successful, fill objDownloadResult with results
            if($intLiveMode > 0) {
                file_put_contents($this->strDownloadPath.'/.wgetout', "\n", FILE_APPEND); // The last output line doesn't end with newline, so file() will miss it if we don't do this...
                $this->arrOutputLines = file($this->strDownloadPath.'/.wgetout');
                unlink($this->strDownloadPath.'/.wgetout');
            }
            $this->objDownloadResult = $this->_parseOutput();
            return true;
        } else { return $this->intReturnValue."::".$this->arrErrorDescriptions[strval($this->intReturnValue)]; }
    }
}
?>
