From 923282f1d1e08e180aca6c496b339e63f41130ff Mon Sep 17 00:00:00 2001 From: sergiotarxz Date: Sat, 30 Oct 2021 18:59:54 +0200 Subject: [PATCH] Splitting manga.c contents into xml.c and soup.c. It is still needed to refactor string manipulation and delete manga.c and manga.h (The root one, MgManga is ok.) --- Makefile | 2 +- include/manga.h | 32 +------- include/openmg/manga.h | 6 -- include/openmg/util/soup.h | 15 ++++ include/openmg/util/xml.h | 37 +++++++++ manga.c | 164 ++----------------------------------- src/backend/readmng.c | 31 ++++--- src/manga.c | 2 +- src/util/soup.c | 67 +++++++++++++++ src/util/xml.c | 140 +++++++++++++++++++++++++++++++ src/view/list_view_manga.c | 6 +- 11 files changed, 296 insertions(+), 206 deletions(-) create mode 100644 include/openmg/util/soup.h create mode 100644 include/openmg/util/xml.h create mode 100644 src/util/soup.c create mode 100644 src/util/xml.c diff --git a/Makefile b/Makefile index 08faeda..48899cf 100644 --- a/Makefile +++ b/Makefile @@ -6,4 +6,4 @@ LDFLAGS := $(shell pkg-config --libs ${LIBS}) CC_COMMAND := ${CC} ${INCDIR} ${CFLAGS} all: build build: - ${CC_COMMAND} src/view/list_view_manga.c src/view/main_view.c src/manga.c src/backend/readmng.c manga.c main.c -o main ${LDFLAGS} -ggdb + ${CC_COMMAND} src/util/xml.c src/util/soup.c src/view/list_view_manga.c src/view/main_view.c src/manga.c src/backend/readmng.c manga.c main.c -o main ${LDFLAGS} -ggdb diff --git a/include/manga.h b/include/manga.h index 20064d0..ff8a9a7 100644 --- a/include/manga.h +++ b/include/manga.h @@ -7,8 +7,6 @@ #include #endif -#define XML_COPY_NODE_RECURSIVE 2 | 1 - struct Manga { char *title; char *image_url; @@ -24,38 +22,14 @@ struct String { size_t size; }; -char * -get_request (const char *url, gsize *size_response_text); -xmlNodePtr * -find_class (xmlNodePtr node, char *class, size_t *len, xmlNodePtr *nodes, - int return_on_first); -void -print_debug_nodes (const xmlDocPtr html_document, - xmlNodePtr *nodes, size_t nodes_len); -char * -get_attr (xmlNodePtr const node, const char *attr_name); -void -copy_substring(const char *origin, char *dest, size_t dest_len, size_t start, - size_t len); -int -has_class (const char *class_attribute, - const char *class_to_check); struct SplittedString * split(char *re_str, size_t re_str_size, const char *subject, size_t subject_size); char * alloc_string(size_t len); void splitted_string_free (struct SplittedString *splitted_string); -void -iterate_string_to_split(struct SplittedString *splitted_string, - pcre2_code *re, int *will_break, const char *subject, - size_t subject_size, size_t *start_pos, size_t *offset); -xmlXPathObjectPtr -get_nodes_xpath_expression (const xmlDocPtr document, char *xpath); -xmlNodePtr * -loop_search_class (const xmlNodePtr node, xmlNodePtr *nodes, - const char * class, size_t *len); -char * -copy_binary_data (const char *input, size_t size); char * match_1 (char *re_str, char *subject); +void +copy_substring(const char *origin, char *dest, size_t dest_len, size_t start, + size_t len); diff --git a/include/openmg/manga.h b/include/openmg/manga.h index 519973f..5942bb8 100644 --- a/include/openmg/manga.h +++ b/include/openmg/manga.h @@ -3,15 +3,9 @@ G_BEGIN_DECLS; -/* - * Type declaration - */ #define MG_TYPE_MANGA mg_manga_get_type() G_DECLARE_FINAL_TYPE (MgManga, mg_manga, MG, MANGA, GObject) -/* - * Method definitions. - */ char *mg_manga_get_image_url(MgManga *mg_manga); char *mg_manga_get_title(MgManga *mg_manga); char *mg_manga_get_id(MgManga *mg_manga); diff --git a/include/openmg/util/soup.h b/include/openmg/util/soup.h new file mode 100644 index 0000000..4d503c4 --- /dev/null +++ b/include/openmg/util/soup.h @@ -0,0 +1,15 @@ +#pragma once +#include + +G_BEGIN_DECLS + +#define MG_TYPE_UTIL_SOUP mg_util_soup_get_type() +G_DECLARE_FINAL_TYPE (MgUtilSoup, mg_util_soup, MG, UTIL_SOUP, GObject) + +MgUtilSoup * +mg_util_soup_new (); + +char * +mg_util_soup_get_request (MgUtilSoup *self, const char *const url, gsize *size_response_text); + +G_END_DECLS diff --git a/include/openmg/util/xml.h b/include/openmg/util/xml.h new file mode 100644 index 0000000..424fdc1 --- /dev/null +++ b/include/openmg/util/xml.h @@ -0,0 +1,37 @@ +#pragma once + +#include + +#include +#include + +#include //Nasty file + +G_BEGIN_DECLS; + +#define XML_COPY_NODE_RECURSIVE 2 | 1 +#define MG_TYPE_UTIL_XML mg_util_xml_get_type() + +G_DECLARE_FINAL_TYPE (MgUtilXML, mg_util_xml, MG, UTIL_XML, GObject) + +MgUtilXML * +mg_util_xml_new ();; + +char * +mg_util_xml_get_attr (MgUtilXML *self, xmlNodePtr const node, const char *attr_name); + +xmlNodePtr * +mg_util_xml_find_class (MgUtilXML *self, xmlNodePtr node, char *class, + size_t *len, xmlNodePtr *nodes, int return_on_first); + +xmlNodePtr * +mg_util_xml_loop_search_class (MgUtilXML *self, const xmlNodePtr node, xmlNodePtr *nodes, + const char * class, size_t *len); +xmlXPathObjectPtr +mg_util_xml_get_nodes_xpath_expression (MgUtilXML *self, + const xmlDocPtr document, char *xpath); +int +mg_util_xml_has_class (MgUtilXML *self, + const char *class_attribute, const char *class_to_check); + +G_END_DECLS diff --git a/manga.c b/manga.c index 36daf50..317a04b 100644 --- a/manga.c +++ b/manga.c @@ -1,6 +1,4 @@ #include -#include -#include #ifndef PCRE2_CODE_UNIT_WIDTH #define PCRE2_CODE_UNIT_WIDTH 8 #include @@ -8,111 +6,11 @@ #include -char * -get_request (const char *url, gsize *size_response_text) { - SoupSession *soup_session; - SoupMessage *msg; - GValue response = G_VALUE_INIT; - guint status; - - *size_response_text = 0; - - g_value_init (&response, G_TYPE_BYTES); - - soup_session = soup_session_new (); - msg = soup_message_new ("GET", url); - status = soup_session_send_message (soup_session, msg); - g_object_get_property( - G_OBJECT (msg), - "response-body-data", - &response); - - const char *html_response = g_bytes_get_data ((GBytes *) - g_value_peek_pointer (&response), - size_response_text); - - char *return_value = copy_binary_data(html_response, *size_response_text); - - g_value_unset (&response); - g_object_unref (soup_session); - g_object_unref (msg); - - return return_value; -} - -char * -copy_binary_data (const char *input, size_t size) { - char *response = NULL; - if (size) { - response = g_realloc(response, sizeof *response * size); - for (size_t i = 0; ichildren; child; child=child->next) { - char *attr = get_attr (child, "class"); - if (attr && has_class (attr, class)) { - (*len)++; - nodes = g_realloc (nodes, sizeof *nodes * *len); - nodes[*len-1] = child; - if (return_on_first) { - return nodes; - } - } - if (node->children) { - xmlNodePtr child = node->children; - for (;child;child=child->next) { - nodes = find_class (child, class, len, nodes, - return_on_first); - if (*len) { - return nodes; - } - } - } - } - return nodes; -} - -void -print_debug_nodes (const xmlDocPtr html_document, - xmlNodePtr *nodes, size_t nodes_len) { - xmlBufferPtr buffer = xmlBufferCreate (); - for (int i = 0; i < nodes_len; i++) { - xmlNodeDump (buffer, html_document, nodes[i], - 0, 1); - } - xmlBufferDump (stdout, buffer); - xmlBufferFree (buffer); -} - -char * -get_attr (xmlNodePtr const node, const char *attr_name) { - char *return_value = NULL; - if (!node) { - return NULL; - } - for (xmlAttr *attr = node->properties; attr; attr=attr->next) { - if (!xmlStrcmp(attr->name, (const xmlChar *) attr_name) - && attr->children && attr->children->content) { - if (!attr->children->content) continue; - size_t content_len = strlen((char *) - attr->children->content); - return_value = alloc_string(content_len); - copy_substring ((char *) attr->children->content, return_value, - content_len, - 0, - content_len); - break; - } - } - return return_value; -} +// TODO: Split this file and delete it. +static void +iterate_string_to_split(struct SplittedString *splitted_string, + pcre2_code *re, int *will_break, const char *subject, + size_t subject_size, size_t *start_pos, size_t *offset); void copy_substring(const char *origin, char *dest, size_t dest_len, size_t start, @@ -128,27 +26,6 @@ copy_substring(const char *origin, char *dest, size_t dest_len, size_t start, } dest[len] = '\0'; } - -int -has_class (const char *class_attribute, - const char *class_to_check) { - char *re = "\\s+"; - struct SplittedString *classes; - int return_value = 0; - classes = split(re, strlen(re), class_attribute, - strlen(class_attribute)); - for (int i = 0; in_strings; i++) { - if (strcmp(classes->substrings[i].content, class_to_check) == 0) { - return_value = 1; - goto cleanup_has_class; - } - } - -cleanup_has_class: - splitted_string_free (classes); - return return_value; -} - struct SplittedString * split(char *re_str, size_t re_str_size, const char *subject, size_t subject_size) { pcre2_code_8 *re; @@ -195,7 +72,7 @@ splitted_string_free (struct SplittedString *splitted_string) { g_free (splitted_string); } -void +static void iterate_string_to_split(struct SplittedString *splitted_string, pcre2_code *re, int *will_break, const char *subject, size_t subject_size, size_t *start_pos, size_t *offset) { pcre2_match_data_8 *match_data; @@ -245,35 +122,6 @@ cleanup_iterate_string_to_split: pcre2_match_data_free (match_data); } -xmlXPathObjectPtr -get_nodes_xpath_expression (const xmlDocPtr document, char *xpath) { - xmlXPathContextPtr context; - xmlXPathObjectPtr result; - - context = xmlXPathNewContext (document); - result = xmlXPathEvalExpression ((const xmlChar *)xpath, context); - - xmlXPathFreeContext (context); - - return result; -} - -xmlNodePtr * -loop_search_class (const xmlNodePtr node, xmlNodePtr *nodes, - const char * class, size_t *len) { - char *content = get_attr (node, "class"); - if (!content) { - return nodes; - } - if (has_class (content, class)) { - (*len)++; - nodes = g_realloc (nodes, (sizeof *nodes) * *len); - nodes[(*len)-1] = xmlCopyNode(node, XML_COPY_NODE_RECURSIVE); - } - g_free (content); - return nodes; -} - char * match_1 (char *re_str, char *subject) { pcre2_code *re; diff --git a/src/backend/readmng.c b/src/backend/readmng.c index c7e3247..f8c8e8f 100644 --- a/src/backend/readmng.c +++ b/src/backend/readmng.c @@ -1,6 +1,8 @@ #include #include +#include +#include #include #include @@ -15,6 +17,7 @@ struct _MgBackendReadmng { char *base_url; size_t main_page_html_len; char *main_page_html; + MgUtilXML *xml_utils; GListStore *(*get_featured_manga) (); }; @@ -67,6 +70,7 @@ mg_backend_readmng_init (MgBackendReadmng *self) { if (!self->base_url) { self->base_url = "https://www.readmng.com/"; } + self->xml_utils = mg_util_xml_new (); } char * @@ -137,8 +141,9 @@ mg_backend_readmng_fetch_xml_main_page (MgBackendReadmng *self) { static const char * mg_backend_readmng_get_main_page (MgBackendReadmng *self, size_t *len) { if (!self->main_page_html) { - self->main_page_html = get_request (self->base_url, - &self->main_page_html_len); + MgUtilSoup *util_soup = mg_util_soup_new (); + self->main_page_html = mg_util_soup_get_request (util_soup, + self->base_url, &self->main_page_html_len); } if (len) { *len = self->main_page_html_len; @@ -155,7 +160,6 @@ mg_backend_readmng_parse_main_page (MgBackendReadmng *self, const xmlDocPtr html size_t li_len = 0; li = mg_backend_readmng_retrieve_li_slides (self, slides, &li_len); - print_debug_nodes (html_document, li, li_len); for (int i = 0; ixml_utils; + xpath_result = mg_util_xml_get_nodes_xpath_expression (xml_utils, + html_document, "//div[@class]"); xmlNodePtr slides = NULL; xmlNodeSetPtr node_set = NULL; size_t matching_classes_len = 0; @@ -204,7 +209,8 @@ mg_backend_readmng_retrieve_slides (MgBackendReadmng *self, const xmlDocPtr html } for (int i = 0; i < node_set->nodeNr; i++) { xmlNodePtr node = node_set->nodeTab[i]; - nodes = loop_search_class (node, nodes, "slides", &matching_classes_len); + nodes = mg_util_xml_loop_search_class (xml_utils, node, nodes, + "slides", &matching_classes_len); } if (nodes) { slides = nodes[0]; @@ -218,7 +224,8 @@ mg_backend_readmng_retrieve_slides (MgBackendReadmng *self, const xmlDocPtr html static xmlNodePtr mg_backend_readmng_retrieve_thumbnail_from_li (MgBackendReadmng *self, xmlNodePtr current_li) { size_t thumbnail_len = 0; - xmlNodePtr *thumbnail = find_class (current_li, "thumbnail", + MgUtilXML *xml_utils = self->xml_utils; + xmlNodePtr *thumbnail = mg_util_xml_find_class (xml_utils, current_li, "thumbnail", &thumbnail_len, NULL, 1); if (thumbnail_len) return thumbnail[0]; return NULL; @@ -227,7 +234,8 @@ mg_backend_readmng_retrieve_thumbnail_from_li (MgBackendReadmng *self, xmlNodePt static xmlNodePtr mg_backend_readmng_retrieve_title_from_li (MgBackendReadmng *self, xmlNodePtr li) { size_t title_len = 0; - xmlNodePtr *title = find_class (li, "title", &title_len, NULL, 1); + MgUtilXML *xml_utils = self->xml_utils; + xmlNodePtr *title = mg_util_xml_find_class (xml_utils, li, "title", &title_len, NULL, 1); if (title_len) return title[0]; return NULL; } @@ -246,7 +254,8 @@ mg_backend_readmng_find_a_link_chapter (MgBackendReadmng *self, static char * mg_backend_get_id_manga_link (MgBackendReadmng *self, xmlNodePtr a) { char *re_str = "readmng\\.com/([^/]+)"; - return match_1 (re_str, get_attr (a, "href")); + MgUtilXML *xml_utils = self->xml_utils; + return match_1 (re_str, mg_util_xml_get_attr (xml_utils, a, "href")); } static void @@ -257,13 +266,15 @@ mg_backend_readmng_extract_manga_info_from_current_li (MgBackendReadmng *self, xmlNodePtr title = mg_backend_readmng_retrieve_title_from_li (self, current_li); xmlNodePtr a = mg_backend_readmng_find_a_link_chapter (self, current_li); xmlNodePtr img; + MgUtilXML *xml_utils = self->xml_utils; char *id_manga = NULL; if (thumbnail && title && (img = mg_backend_readmng_retrieve_img_from_thumbnail (self, thumbnail)) && a && (id_manga = mg_backend_get_id_manga_link (self, a))) { g_list_store_append (mangas, - mg_manga_new (get_attr (img, "src"), (char *)xmlNodeGetContent (title), id_manga)); + mg_manga_new (mg_util_xml_get_attr (xml_utils, img, "src"), + (char *)xmlNodeGetContent (title), id_manga)); } } diff --git a/src/manga.c b/src/manga.c index 943d7c3..a67bff4 100644 --- a/src/manga.c +++ b/src/manga.c @@ -143,7 +143,7 @@ mg_manga_get_property (GObject *object, MgManga * mg_manga_new (const char *const image_url, const char *const title, const char *id) { MgManga *self = NULL; - self = (MG_MANGA) (g_object_new (MG_TYPE_MANGA, NULL)); + self = MG_MANGA ((g_object_new (MG_TYPE_MANGA, NULL))); self->image_url = alloc_string (strlen (image_url)); self->title = alloc_string (strlen (title)); self->id = alloc_string (strlen (id)); diff --git a/src/util/soup.c b/src/util/soup.c new file mode 100644 index 0000000..94965c4 --- /dev/null +++ b/src/util/soup.c @@ -0,0 +1,67 @@ +#include + +#include + +struct _MgUtilSoup { + GObject parent_instance; +}; + +G_DEFINE_TYPE (MgUtilSoup, mg_util_soup, G_TYPE_OBJECT) + +MgUtilSoup * +mg_util_soup_new () { + MgUtilSoup *self = NULL; + self = MG_UTIL_SOUP (g_object_new (MG_TYPE_UTIL_SOUP, NULL)); + return self; +} + +static char * +mg_util_soup_copy_binary_data (MgUtilSoup *self, const char *input, size_t size); +static void +mg_util_soup_class_init (MgUtilSoupClass *class) { +} +static void +mg_util_soup_init (MgUtilSoup *self) { +} +char * +mg_util_soup_get_request (MgUtilSoup *self, const char *url, gsize *size_response_text) { + SoupSession *soup_session; + SoupMessage *msg; + GValue response = G_VALUE_INIT; + guint status; + + *size_response_text = 0; + + g_value_init (&response, G_TYPE_BYTES); + + soup_session = soup_session_new (); + msg = soup_message_new ("GET", url); + status = soup_session_send_message (soup_session, msg); + g_object_get_property( + G_OBJECT (msg), + "response-body-data", + &response); + + const char *html_response = g_bytes_get_data ((GBytes *) + g_value_peek_pointer (&response), + size_response_text); + + char *return_value = mg_util_soup_copy_binary_data(self, html_response, *size_response_text); + + g_value_unset (&response); + g_object_unref (soup_session); + g_object_unref (msg); + + return return_value; +} +static char * +mg_util_soup_copy_binary_data (MgUtilSoup *self, const char *input, size_t size) { + char *response = NULL; + if (size) { + response = g_realloc(response, sizeof *response * size); + for (size_t i = 0; i + +#include +#include + +#include + +struct _MgUtilXML { + GObject parent_instance; +}; + +G_DEFINE_TYPE (MgUtilXML, mg_util_xml, G_TYPE_OBJECT) + + +static void +mg_util_xml_class_init (MgUtilXMLClass *class) { +} +static void +mg_util_xml_init (MgUtilXML *self) { +} + +MgUtilXML * +mg_util_xml_new () { + MgUtilXML *self = NULL; + self = MG_UTIL_XML ((g_object_new (MG_TYPE_UTIL_XML, NULL))); + return self; +} + +xmlNodePtr * +mg_util_xml_find_class (MgUtilXML *self, xmlNodePtr node, char *class, + size_t *len, xmlNodePtr *nodes, int return_on_first) { + for (xmlNodePtr child = node->children; child; child=child->next) { + char *attr = mg_util_xml_get_attr (self, child, "class"); + if (attr && mg_util_xml_has_class (self, attr, class)) { + (*len)++; + nodes = g_realloc (nodes, sizeof *nodes * *len); + nodes[*len-1] = child; + if (return_on_first) { + return nodes; + } + } + if (node->children) { + xmlNodePtr child = node->children; + for (;child;child=child->next) { + nodes = mg_util_xml_find_class (self, child, class, + len, nodes, return_on_first); + if (*len) { + return nodes; + } + } + } + } + return nodes; +} + +char * +mg_util_xml_get_attr (MgUtilXML *self, xmlNodePtr const node, const char *attr_name) { + char *return_value = NULL; + if (!node) { + return NULL; + } + for (xmlAttr *attr = node->properties; attr; attr=attr->next) { + if (!xmlStrcmp(attr->name, (const xmlChar *) attr_name) + && attr->children && attr->children->content) { + if (!attr->children->content) continue; + size_t content_len = strlen((char *) + attr->children->content); + return_value = alloc_string(content_len); + copy_substring ((char *) attr->children->content, return_value, + content_len, + 0, + content_len); + break; + } + } + return return_value; +} + +void +mg_util_xml_print_debug_nodes (MgUtilXML *self, + const xmlDocPtr html_document, xmlNodePtr *nodes, + size_t nodes_len) { + xmlBufferPtr buffer = xmlBufferCreate (); + for (int i = 0; i < nodes_len; i++) { + xmlNodeDump (buffer, html_document, nodes[i], + 0, 1); + } + xmlBufferDump (stdout, buffer); + xmlBufferFree (buffer); +} + +int +mg_util_xml_has_class (MgUtilXML *self, + const char *class_attribute, const char *class_to_check) { + char *re = "\\s+"; + struct SplittedString *classes; + int return_value = 0; + classes = split (re, strlen(re), class_attribute, + strlen (class_attribute)); + for (int i = 0; in_strings; i++) { + if (strcmp (classes->substrings[i].content, class_to_check) == 0) { + return_value = 1; + goto cleanup_has_class; + } + } + +cleanup_has_class: + splitted_string_free (classes); + return return_value; +} + +xmlNodePtr * +mg_util_xml_loop_search_class (MgUtilXML *self, const xmlNodePtr node, xmlNodePtr *nodes, + const char * class, size_t *len) { + char *content = mg_util_xml_get_attr (self, node, "class"); + if (!content) { + return nodes; + } + if (mg_util_xml_has_class (self, content, class)) { + (*len)++; + nodes = g_realloc (nodes, (sizeof *nodes) * *len); + nodes[(*len)-1] = xmlCopyNode(node, XML_COPY_NODE_RECURSIVE); + } + g_free (content); + return nodes; +} + +xmlXPathObjectPtr +mg_util_xml_get_nodes_xpath_expression (MgUtilXML *self, + const xmlDocPtr document, char *xpath) { + xmlXPathContextPtr context; + xmlXPathObjectPtr result; + + context = xmlXPathNewContext (document); + result = xmlXPathEvalExpression ((const xmlChar *)xpath, context); + + xmlXPathFreeContext (context); + + return result; +} diff --git a/src/view/list_view_manga.c b/src/view/list_view_manga.c index 75a84f7..1be882a 100644 --- a/src/view/list_view_manga.c +++ b/src/view/list_view_manga.c @@ -1,6 +1,8 @@ #include #include +#include + #include #include @@ -28,7 +30,9 @@ setup_list_view_mangas (GtkSignalListItemFactory *factory, size_t size_downloaded_image = 0; char *downloaded_image; - downloaded_image = get_request (mg_manga_get_image_url(manga), &size_downloaded_image); + MgUtilSoup *util_soup = mg_util_soup_new (); + downloaded_image = mg_util_soup_get_request (util_soup, mg_manga_get_image_url(manga), + &size_downloaded_image); tmp_image = g_file_new_tmp ("mangareadertmpfileXXXXXX", &iostream, &error