From 595eda2215d61dae5965a858fa92d36aa26bc02c Mon Sep 17 00:00:00 2001 From: sergiotarxz Date: Sun, 17 Oct 2021 19:41:52 +0200 Subject: [PATCH] Making some fixes and searching for divs. Next step is pcre2 integration. --- mangafox.c | 94 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 53 insertions(+), 41 deletions(-) diff --git a/mangafox.c b/mangafox.c index 91d42d8..3747c24 100644 --- a/mangafox.c +++ b/mangafox.c @@ -15,40 +15,16 @@ xmlXPathObjectPtr get_nodes_xpath_expression ( const xmlDocPtr document, char *xpath); +char * +get_request (const char *url, gsize *size_response_text); void -retrieve_mangafox_title() { - SoupSession - *soup_session; - SoupMessage *msg; - GValue response = G_VALUE_INIT; - guint status; - gsize size_response_text; +retrieve_mangafox_title () { xmlDocPtr html_response; - - g_value_init (&response, G_TYPE_BYTES); - - soup_session = - soup_session_new(); - msg = - soup_message_new( - "GET", - mangafox_url - ); - status = - soup_session_send_message (soup_session, msg); - g_object_get_property( - G_OBJECT (msg), - "response-body-data", - &response); - const char *response_text = - g_bytes_get_data ( - (GBytes *) - g_value_peek_pointer - (&response), - &size_response_text - ); + gsize *size_response_text = malloc (sizeof (gsize)); + char *response_text = get_request (mangafox_url, + size_response_text); html_response = htmlReadMemory (response_text, - size_response_text, + *size_response_text, NULL, NULL, HTML_PARSE_RECOVER | HTML_PARSE_NODEFDTD @@ -56,39 +32,75 @@ retrieve_mangafox_title() { ); size_t manga_size; parse_main_mangafox_page (html_response, &manga_size); + free (response_text); +} + +char * +get_request (const char *url, gsize *size_response_text) { + SoupSession *soup_session; + SoupMessage *msg; + GValue response = G_VALUE_INIT; + guint status; + + *size_response_text = 0; + + g_value_init (&response, G_TYPE_BYTES); + + soup_session = soup_session_new (); + msg = soup_message_new ("GET", url); + status = soup_session_send_message (soup_session, msg); + g_object_get_property( + G_OBJECT (msg), + "response-body-data", + &response); + printf("%u\n", status); + const char *html_response = g_bytes_get_data ((GBytes *) + g_value_peek_pointer (&response), + size_response_text); + + char *return_value = g_strndup (html_response, *size_response_text); + + g_value_unset (&response); + g_object_unref (soup_session); + g_object_unref (msg); + + return return_value; } struct Manga * -parse_main_mangafox_page ( - const xmlDocPtr html_document, +parse_main_mangafox_page (const xmlDocPtr html_document, const size_t *size) { xmlIndentTreeOutput = 1; -// xmlDocDump (stderr, html_document); xmlXPathObjectPtr xpath_result = get_nodes_xpath_expression (html_document, - "//a"); + "//div[@class]"); if (!xpath_result) { fprintf(stderr, "Empty xpath result\n"); return NULL; } xmlNodeSetPtr node_set = xpath_result->nodesetval; - printf("%d\n", node_set->nodeNr); + if (!node_set) { + fprintf(stderr, "No match\n"); + return NULL; + } for (int i = 0; i < node_set->nodeNr; i++) { xmlNodePtr node = node_set->nodeTab[i]; - for (xmlAttr *attrs = node->properties; attrs->next; attrs=attrs->next) { - if (!xmlStrcmp(attrs->name, (const xmlChar *)"href")) { + for (xmlAttr *attrs = node->properties; attrs; attrs=attrs->next) { + if (!xmlStrcmp(attrs->name, (const xmlChar *)"class")) { + if (attrs->children + && attrs->children->content) { printf("%s\n", (const char *)attrs->children->content); + break; } } + } } } xmlXPathObjectPtr -get_nodes_xpath_expression ( - const xmlDocPtr document, - char *xpath) { +get_nodes_xpath_expression (const xmlDocPtr document, char *xpath) { xmlXPathContextPtr context; xmlXPathObjectPtr result;