/***************************************************************************
 *                                  _   _ ____  _
 *  Project                     ___| | | |  _ \| |
 *                             / __| | | | |_) | |
 *                            | (__| |_| |  _ <| |___
 *                             \___|\___/|_| \_\_____|
 *
 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
 *
 * This software is licensed as described in the file COPYING, which
 * you should have received as part of this distribution. The terms
 * are also available at https://curl.se/docs/copyright.html.
 *
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
 * copies of the Software, and permit persons to whom the Software is
 * furnished to do so, under the terms of the COPYING file.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 * SPDX-License-Identifier: curl
 *
 ***************************************************************************/
/* <DESC>
 * Get a webpage, extract the title with libxml.
 * </DESC>

 Written by Lars Nilsson

 GNU C++ compile command line suggestion (edit paths accordingly):

 g++ -Wall -I/opt/curl/include -I/opt/libxml/include/libxml2 htmltitle.cpp \
 -o htmltitle -L/opt/curl/lib -L/opt/libxml/lib -lcurl -lxml2
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <string>

#include <curl/curl.h>

#include <libxml/HTMLparser.h>

//
//  Case-insensitive string comparison
//

#ifdef _WIN32
#define COMPARE(a, b) (!_stricmp((a), (b)))
#else
#define COMPARE(a, b) (!strcasecmp((a), (b)))
#endif

//
//  libxml callback context structure
//
struct Context {
  Context() : addTitle(false) {}

  bool addTitle;
  std::string title;
};

//
//  libcurl variables for error strings and returned data

static char errorBuffer[CURL_ERROR_SIZE];
static std::string buffer;

//
//  libcurl write callback function
//
static size_t writer(char *data, size_t size, size_t nmemb,
                     std::string *writerData)
{
  if(writerData == NULL)
    return 0;

  writerData->append(data, size * nmemb);

  return size * nmemb;
}

//
//  libcurl connection initialization
//
static bool init(CURL *&curl, const char *url)
{
  CURLcode res;

  curl = curl_easy_init();

  if(!curl) {
    fprintf(stderr, "Failed to create CURL handle\n");
    return false;
  }

  res = curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errorBuffer);
  if(res != CURLE_OK) {
    fprintf(stderr, "Failed to set error buffer [%d]\n", res);
    return false;
  }

  res = curl_easy_setopt(curl, CURLOPT_URL, url);
  if(res != CURLE_OK) {
    fprintf(stderr, "Failed to set URL [%s]\n", errorBuffer);
    return false;
  }

  res = curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  if(res != CURLE_OK) {
    fprintf(stderr, "Failed to set redirect option [%s]\n", errorBuffer);
    return false;
  }

  res = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writer);
  if(res != CURLE_OK) {
    fprintf(stderr, "Failed to set writer [%s]\n", errorBuffer);
    return false;
  }

  res = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer);
  if(res != CURLE_OK) {
    fprintf(stderr, "Failed to set write data [%s]\n", errorBuffer);
    return false;
  }

  return true;
}

//
//  libxml start element callback function
//
static void StartElement(void *voidContext,
                         const xmlChar *name,
                         const xmlChar **attributes)
{
  Context *context = static_cast<Context *>(voidContext);

  if(COMPARE(reinterpret_cast<const char *>(name), "TITLE")) {
    context->title = "";
    context->addTitle = true;
  }
  (void)attributes;
}

//
//  libxml end element callback function
//
static void EndElement(void *voidContext,
                       const xmlChar *name)
{
  Context *context = static_cast<Context *>(voidContext);

  if(COMPARE(reinterpret_cast<const char *>(name), "TITLE"))
    context->addTitle = false;
}

//
//  Text handling helper function
//
static void handleCharacters(Context *context,
                             const xmlChar *chars,
                             int length)
{
  if(context->addTitle)
    context->title.append(reinterpret_cast<const char *>(chars),
                          (unsigned long)length);
}

//
//  libxml PCDATA callback function
//
static void Characters(void *voidContext,
                       const xmlChar *chars,
                       int length)
{
  Context *context = static_cast<Context *>(voidContext);

  handleCharacters(context, chars, length);
}

//
//  libxml CDATA callback function
//
static void cdata(void *voidContext,
                  const xmlChar *chars,
                  int length)
{
  Context *context = static_cast<Context *>(voidContext);

  handleCharacters(context, chars, length);
}

//
//  libxml SAX callback structure
//
static htmlSAXHandler saxHandler = {
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  StartElement,
  EndElement,
  NULL,
  Characters,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  NULL,
  cdata,
  NULL,
  0,
  0,
  0,
  0,
  NULL
};

//
//  Parse given (assumed to be) HTML text and return the title
//
static void parseHtml(const std::string &html,
                      std::string &title)
{
  htmlParserCtxtPtr ctxt;
  Context context;

  ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "",
                                  XML_CHAR_ENCODING_NONE);

  htmlParseChunk(ctxt, html.c_str(), (int)html.size(), 0);
  htmlParseChunk(ctxt, "", 0, 1);

  htmlFreeParserCtxt(ctxt);

  title = context.title;
}

int main(int argc, char *argv[])
{
  CURL *curl = NULL;
  CURLcode res;
  std::string title;

  // Ensure one argument is given

  if(argc != 2) {
    fprintf(stderr, "Usage: %s <url>\n", argv[0]);
    return EXIT_FAILURE;
  }

  res = curl_global_init(CURL_GLOBAL_ALL);
  if(res)
    return (int)res;

  // Initialize CURL handle

  if(!init(curl, argv[1])) {
    fprintf(stderr, "Handle initialization failed\n");
    curl_global_cleanup();
    return EXIT_FAILURE;
  }

  // Retrieve content for the URL

  res = curl_easy_perform(curl);
  curl_easy_cleanup(curl);

  if(res != CURLE_OK) {
    fprintf(stderr, "Failed to get '%s' [%s]\n", argv[1], errorBuffer);
    return EXIT_FAILURE;
  }

  // Parse the (assumed) HTML code
  parseHtml(buffer, title);

  // Display the extracted title
  printf("Title: %s\n", title.c_str());

  return (int)res;
}
