/* wget.c - Simple downloader to get the resource file in HTTP server
 *
 * Copyright 2016 Lipi C.H. Lee <lipisoft@gmail.com>
 *

USE_WGET(NEWTOY(wget, "(no-check-certificate)O:", TOYFLAG_USR|TOYFLAG_BIN))

config WGET
  bool "wget"
  default n
  help
    usage: wget -O filename URL
    -O filename: specify output filename
    URL: uniform resource location, FTP/HTTP only, not HTTPS

    examples:
      wget -O index.html http://www.example.com
      wget -O sample.jpg ftp://ftp.example.com:21/sample.jpg
*/

#define FOR_wget
#include "toys.h"

GLOBALS(
  char *filename;
)

// extract hostname from url
static unsigned get_hn(const char *url, char *hostname) {
  unsigned i;

  for (i = 0; url[i] != '\0' && url[i] != ':' && url[i] != '/'; i++) {
    if(i >= 1024) error_exit("too long hostname in URL");
    hostname[i] = url[i];
  }
  hostname[i] = '\0';

  return i;
}

// extract port number
static unsigned get_port(const char *url, char *port, unsigned url_i) {
  unsigned i;

  for (i = 0; url[i] != '\0' && url[i] != '/'; i++, url_i++) {
    if('0' <= url[i] && url[i] <= '9') port[i] = url[i];
    else error_exit("wrong decimal port number");
  }
  if(i <= 6) port[i] = '\0';
  else error_exit("too long port number");

  return url_i;
}

// get http infos in URL
static void get_info(const char *url, char* hostname, char *port, char *path) {
  unsigned i = 7, len;
  char ftp = !strncmp(url, "ftp://", 6);

  if (ftp) i--;
  else if (strncmp(url, "http://", i)) error_exit("only FTP/HTTP support");
  len = get_hn(url+i, hostname);
  i += len;

  // get port if exists
  if (url[i] == ':') {
    i++;
    i = get_port(url+i, port, i);
  } else strcpy(port, "80");

  // get uri in URL
  if (url[i] == '\0') strcpy(path, "/");
  else if (url[i] == '/') {
    if (strlen(url+i) < 1024) strcpy(path, url+i);
    else error_exit("too long path in URL");
  } else error_exit("wrong URL");

  if (ftp) xexec((char *[]){"ftpget", hostname, TT.filename, path, 0});
}

// connect to any IPv4 or IPv6 server
static int conn_svr(const char *hostname, const char *port) {
  struct addrinfo hints, *result, *rp;
  int sock;

  memset(&hints, 0, sizeof(struct addrinfo));
  hints.ai_family = AF_UNSPEC;
  hints.ai_socktype = SOCK_STREAM;
  hints.ai_flags = 0;
  hints.ai_protocol = 0;

  if ((errno = getaddrinfo(hostname, port, &hints, &result)))
    error_exit("getaddrinfo: %s", gai_strerror(errno));

  // try all address list(IPv4 or IPv6) until success
  for (rp = result; rp; rp = rp->ai_next) {
    if ((sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol))
        == -1) {
      perror_msg("socket error");
      continue;
    }
    if (connect(sock, rp->ai_addr, rp->ai_addrlen) != -1)
      break; // succeed in connecting to any server IP
    else perror_msg("connect error");
    close(sock);
  }
  freeaddrinfo(result);
  if(!rp) error_exit("can't connect");

  return sock;
}

// make HTTP request header field
static void mk_fld(char *name, char *value) {
  strcat(toybuf, name);
  strcat(toybuf, ": ");
  strcat(toybuf, value);
  strcat(toybuf, "\r\n");
}

// get http response body starting address and its length
static char *get_body(ssize_t len, ssize_t *body_len) {
  int i;

  for (i = 0; i < len-4; i++)
    if (!strncmp(toybuf+i, "\r\n\r\n", 4)) break;

  *body_len = len - i - 4;
  return toybuf+i+4;
}

void wget_main(void)
{
  int sock, redirects = 10;
  FILE *fp;
  ssize_t len, body_len;
  char *body, *result, *rc, *r_str, *redir_loc = 0;
  char ua[18] = "toybox wget",  hostname[1024], port[6], path[1024];

  // TODO extract filename to be saved from URL
  if (!(toys.optflags & FLAG_O)) help_exit("no filename");
  if (fopen(TT.filename, "r")) error_exit("'%s' already exists", TT.filename);

  if(!toys.optargs[0]) help_exit("no URL");
  get_info(toys.optargs[0], hostname, port, path);

  sprintf(ua+11, "/%s", TOYBOX_VERSION);
  for (;; redirects--) {
    sock = conn_svr(hostname, port);
    // compose HTTP request
    sprintf(toybuf, "GET %s HTTP/1.1\r\n", path);
    mk_fld("Host", hostname);
    mk_fld("User-Agent", ua);
    mk_fld("Connection", "close");
    strcat(toybuf, "\r\n");

    // send the HTTP request
    len = strlen(toybuf);
    if (write(sock, toybuf, len) != len) perror_exit("write error");

    // read HTTP response
    if ((len = read(sock, toybuf, 4096)) == -1) perror_exit("read error");
    if (!strstr(toybuf, "\r\n\r\n")) error_exit("too long HTTP response");
    body = get_body(len, &body_len);
    redir_loc = strstr(toybuf, "Location: ");
    result = strtok(toybuf, "\r");
    strtok(result, " ");
    rc = strtok(NULL, " ");
    r_str = strtok(NULL, " ");

    // HTTP res code check
    if (!strcmp(rc, "301") || !strcmp(rc, "302")) {
      char* eol = 0;
      if ((eol = strchr(redir_loc, '\r')) > 0) *eol = 0;
      else if (redir_loc) error_exit("Could not parse redirect URL");
      if (redirects < 0) error_exit("Too many redirects");

      printf("Redirection: %s %s \n", rc, r_str);
      printf("%s \n", redir_loc);
      redir_loc = redir_loc+strlen("Location: ");
      close(sock);
      get_info(redir_loc, hostname, port, path);
    } else if (!strcmp(rc, "200")) break;
    else error_exit("res: %s(%s)", rc, r_str);
  }


  if (!(fp = fopen(TT.filename, "w"))) perror_exit("fopen error");
  if (fwrite(body, 1, body_len, fp) != body_len)
    error_exit("fwrite error");
  while ((len = read(sock, toybuf, 4096)) > 0)
    if (fwrite(toybuf, 1, len, fp) != len)
      error_exit("fwrite error");
  if (fclose(fp) == EOF) perror_exit("fclose error");
}