Splitting manga.c contents into xml.c and soup.c.

It is still needed to refactor string manipulation
and delete manga.c and manga.h (The root one,
MgManga is ok.)
This commit is contained in:
sergiotarxz 2021-10-30 18:59:54 +02:00
parent 269b85b0d0
commit 923282f1d1
11 changed files with 296 additions and 206 deletions

View File

@ -6,4 +6,4 @@ LDFLAGS := $(shell pkg-config --libs ${LIBS})
CC_COMMAND := ${CC} ${INCDIR} ${CFLAGS}
all: build
build:
${CC_COMMAND} src/view/list_view_manga.c src/view/main_view.c src/manga.c src/backend/readmng.c manga.c main.c -o main ${LDFLAGS} -ggdb
${CC_COMMAND} src/util/xml.c src/util/soup.c src/view/list_view_manga.c src/view/main_view.c src/manga.c src/backend/readmng.c manga.c main.c -o main ${LDFLAGS} -ggdb

View File

@ -7,8 +7,6 @@
#include <pcre2.h>
#endif
#define XML_COPY_NODE_RECURSIVE 2 | 1
struct Manga {
char *title;
char *image_url;
@ -24,38 +22,14 @@ struct String {
size_t size;
};
char *
get_request (const char *url, gsize *size_response_text);
xmlNodePtr *
find_class (xmlNodePtr node, char *class, size_t *len, xmlNodePtr *nodes,
int return_on_first);
void
print_debug_nodes (const xmlDocPtr html_document,
xmlNodePtr *nodes, size_t nodes_len);
char *
get_attr (xmlNodePtr const node, const char *attr_name);
void
copy_substring(const char *origin, char *dest, size_t dest_len, size_t start,
size_t len);
int
has_class (const char *class_attribute,
const char *class_to_check);
struct SplittedString *
split(char *re_str, size_t re_str_size, const char *subject, size_t subject_size);
char *
alloc_string(size_t len);
void
splitted_string_free (struct SplittedString *splitted_string);
void
iterate_string_to_split(struct SplittedString *splitted_string,
pcre2_code *re, int *will_break, const char *subject,
size_t subject_size, size_t *start_pos, size_t *offset);
xmlXPathObjectPtr
get_nodes_xpath_expression (const xmlDocPtr document, char *xpath);
xmlNodePtr *
loop_search_class (const xmlNodePtr node, xmlNodePtr *nodes,
const char * class, size_t *len);
char *
copy_binary_data (const char *input, size_t size);
char *
match_1 (char *re_str, char *subject);
void
copy_substring(const char *origin, char *dest, size_t dest_len, size_t start,
size_t len);

View File

@ -3,15 +3,9 @@
G_BEGIN_DECLS;
/*
* Type declaration
*/
#define MG_TYPE_MANGA mg_manga_get_type()
G_DECLARE_FINAL_TYPE (MgManga, mg_manga, MG, MANGA, GObject)
/*
* Method definitions.
*/
char *mg_manga_get_image_url(MgManga *mg_manga);
char *mg_manga_get_title(MgManga *mg_manga);
char *mg_manga_get_id(MgManga *mg_manga);

View File

@ -0,0 +1,15 @@
#pragma once
#include <glib-object.h>
G_BEGIN_DECLS
#define MG_TYPE_UTIL_SOUP mg_util_soup_get_type()
G_DECLARE_FINAL_TYPE (MgUtilSoup, mg_util_soup, MG, UTIL_SOUP, GObject)
MgUtilSoup *
mg_util_soup_new ();
char *
mg_util_soup_get_request (MgUtilSoup *self, const char *const url, gsize *size_response_text);
G_END_DECLS

37
include/openmg/util/xml.h Normal file
View File

@ -0,0 +1,37 @@
#pragma once
#include <glib-object.h>
#include <libxml/HTMLparser.h>
#include <libxml/xpath.h>
#include <manga.h> //Nasty file
G_BEGIN_DECLS;
#define XML_COPY_NODE_RECURSIVE 2 | 1
#define MG_TYPE_UTIL_XML mg_util_xml_get_type()
G_DECLARE_FINAL_TYPE (MgUtilXML, mg_util_xml, MG, UTIL_XML, GObject)
MgUtilXML *
mg_util_xml_new ();;
char *
mg_util_xml_get_attr (MgUtilXML *self, xmlNodePtr const node, const char *attr_name);
xmlNodePtr *
mg_util_xml_find_class (MgUtilXML *self, xmlNodePtr node, char *class,
size_t *len, xmlNodePtr *nodes, int return_on_first);
xmlNodePtr *
mg_util_xml_loop_search_class (MgUtilXML *self, const xmlNodePtr node, xmlNodePtr *nodes,
const char * class, size_t *len);
xmlXPathObjectPtr
mg_util_xml_get_nodes_xpath_expression (MgUtilXML *self,
const xmlDocPtr document, char *xpath);
int
mg_util_xml_has_class (MgUtilXML *self,
const char *class_attribute, const char *class_to_check);
G_END_DECLS

164
manga.c
View File

@ -1,6 +1,4 @@
#include <libsoup/soup.h>
#include <libxml/HTMLparser.h>
#include <libxml/xpath.h>
#ifndef PCRE2_CODE_UNIT_WIDTH
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
@ -8,111 +6,11 @@
#include <manga.h>
char *
get_request (const char *url, gsize *size_response_text) {
SoupSession *soup_session;
SoupMessage *msg;
GValue response = G_VALUE_INIT;
guint status;
*size_response_text = 0;
g_value_init (&response, G_TYPE_BYTES);
soup_session = soup_session_new ();
msg = soup_message_new ("GET", url);
status = soup_session_send_message (soup_session, msg);
g_object_get_property(
G_OBJECT (msg),
"response-body-data",
&response);
const char *html_response = g_bytes_get_data ((GBytes *)
g_value_peek_pointer (&response),
size_response_text);
char *return_value = copy_binary_data(html_response, *size_response_text);
g_value_unset (&response);
g_object_unref (soup_session);
g_object_unref (msg);
return return_value;
}
char *
copy_binary_data (const char *input, size_t size) {
char *response = NULL;
if (size) {
response = g_realloc(response, sizeof *response * size);
for (size_t i = 0; i<size; i++) {
response[i] = input[i];
}
}
return response;
}
xmlNodePtr *
find_class (xmlNodePtr node, char *class, size_t *len, xmlNodePtr *nodes,
int return_on_first) {
for (xmlNodePtr child = node->children; child; child=child->next) {
char *attr = get_attr (child, "class");
if (attr && has_class (attr, class)) {
(*len)++;
nodes = g_realloc (nodes, sizeof *nodes * *len);
nodes[*len-1] = child;
if (return_on_first) {
return nodes;
}
}
if (node->children) {
xmlNodePtr child = node->children;
for (;child;child=child->next) {
nodes = find_class (child, class, len, nodes,
return_on_first);
if (*len) {
return nodes;
}
}
}
}
return nodes;
}
void
print_debug_nodes (const xmlDocPtr html_document,
xmlNodePtr *nodes, size_t nodes_len) {
xmlBufferPtr buffer = xmlBufferCreate ();
for (int i = 0; i < nodes_len; i++) {
xmlNodeDump (buffer, html_document, nodes[i],
0, 1);
}
xmlBufferDump (stdout, buffer);
xmlBufferFree (buffer);
}
char *
get_attr (xmlNodePtr const node, const char *attr_name) {
char *return_value = NULL;
if (!node) {
return NULL;
}
for (xmlAttr *attr = node->properties; attr; attr=attr->next) {
if (!xmlStrcmp(attr->name, (const xmlChar *) attr_name)
&& attr->children && attr->children->content) {
if (!attr->children->content) continue;
size_t content_len = strlen((char *)
attr->children->content);
return_value = alloc_string(content_len);
copy_substring ((char *) attr->children->content, return_value,
content_len,
0,
content_len);
break;
}
}
return return_value;
}
// TODO: Split this file and delete it.
static void
iterate_string_to_split(struct SplittedString *splitted_string,
pcre2_code *re, int *will_break, const char *subject,
size_t subject_size, size_t *start_pos, size_t *offset);
void
copy_substring(const char *origin, char *dest, size_t dest_len, size_t start,
@ -128,27 +26,6 @@ copy_substring(const char *origin, char *dest, size_t dest_len, size_t start,
}
dest[len] = '\0';
}
int
has_class (const char *class_attribute,
const char *class_to_check) {
char *re = "\\s+";
struct SplittedString *classes;
int return_value = 0;
classes = split(re, strlen(re), class_attribute,
strlen(class_attribute));
for (int i = 0; i<classes->n_strings; i++) {
if (strcmp(classes->substrings[i].content, class_to_check) == 0) {
return_value = 1;
goto cleanup_has_class;
}
}
cleanup_has_class:
splitted_string_free (classes);
return return_value;
}
struct SplittedString *
split(char *re_str, size_t re_str_size, const char *subject, size_t subject_size) {
pcre2_code_8 *re;
@ -195,7 +72,7 @@ splitted_string_free (struct SplittedString *splitted_string) {
g_free (splitted_string);
}
void
static void
iterate_string_to_split(struct SplittedString *splitted_string, pcre2_code *re, int *will_break, const char *subject,
size_t subject_size, size_t *start_pos, size_t *offset) {
pcre2_match_data_8 *match_data;
@ -245,35 +122,6 @@ cleanup_iterate_string_to_split:
pcre2_match_data_free (match_data);
}
xmlXPathObjectPtr
get_nodes_xpath_expression (const xmlDocPtr document, char *xpath) {
xmlXPathContextPtr context;
xmlXPathObjectPtr result;
context = xmlXPathNewContext (document);
result = xmlXPathEvalExpression ((const xmlChar *)xpath, context);
xmlXPathFreeContext (context);
return result;
}
xmlNodePtr *
loop_search_class (const xmlNodePtr node, xmlNodePtr *nodes,
const char * class, size_t *len) {
char *content = get_attr (node, "class");
if (!content) {
return nodes;
}
if (has_class (content, class)) {
(*len)++;
nodes = g_realloc (nodes, (sizeof *nodes) * *len);
nodes[(*len)-1] = xmlCopyNode(node, XML_COPY_NODE_RECURSIVE);
}
g_free (content);
return nodes;
}
char *
match_1 (char *re_str, char *subject) {
pcre2_code *re;

View File

@ -1,6 +1,8 @@
#include <libxml/HTMLparser.h>
#include <openmg/backend/readmng.h>
#include <openmg/util/soup.h>
#include <openmg/util/xml.h>
#include <openmg/manga.h>
#include <manga.h>
@ -15,6 +17,7 @@ struct _MgBackendReadmng {
char *base_url;
size_t main_page_html_len;
char *main_page_html;
MgUtilXML *xml_utils;
GListStore *(*get_featured_manga) ();
};
@ -67,6 +70,7 @@ mg_backend_readmng_init (MgBackendReadmng *self) {
if (!self->base_url) {
self->base_url = "https://www.readmng.com/";
}
self->xml_utils = mg_util_xml_new ();
}
char *
@ -137,8 +141,9 @@ mg_backend_readmng_fetch_xml_main_page (MgBackendReadmng *self) {
static const char *
mg_backend_readmng_get_main_page (MgBackendReadmng *self, size_t *len) {
if (!self->main_page_html) {
self->main_page_html = get_request (self->base_url,
&self->main_page_html_len);
MgUtilSoup *util_soup = mg_util_soup_new ();
self->main_page_html = mg_util_soup_get_request (util_soup,
self->base_url, &self->main_page_html_len);
}
if (len) {
*len = self->main_page_html_len;
@ -155,7 +160,6 @@ mg_backend_readmng_parse_main_page (MgBackendReadmng *self, const xmlDocPtr html
size_t li_len = 0;
li = mg_backend_readmng_retrieve_li_slides (self, slides, &li_len);
print_debug_nodes (html_document, li, li_len);
for (int i = 0; i<li_len; i++) {
xmlNodePtr current_li = li[i];
mg_backend_readmng_extract_manga_info_from_current_li (self,
@ -191,8 +195,9 @@ static xmlNodePtr
mg_backend_readmng_retrieve_slides (MgBackendReadmng *self, const xmlDocPtr html_document) {
xmlNodePtr *nodes = NULL;
xmlXPathObjectPtr xpath_result = NULL;
xpath_result = get_nodes_xpath_expression (html_document,
"//div[@class]");
MgUtilXML *xml_utils = self->xml_utils;
xpath_result = mg_util_xml_get_nodes_xpath_expression (xml_utils,
html_document, "//div[@class]");
xmlNodePtr slides = NULL;
xmlNodeSetPtr node_set = NULL;
size_t matching_classes_len = 0;
@ -204,7 +209,8 @@ mg_backend_readmng_retrieve_slides (MgBackendReadmng *self, const xmlDocPtr html
}
for (int i = 0; i < node_set->nodeNr; i++) {
xmlNodePtr node = node_set->nodeTab[i];
nodes = loop_search_class (node, nodes, "slides", &matching_classes_len);
nodes = mg_util_xml_loop_search_class (xml_utils, node, nodes,
"slides", &matching_classes_len);
}
if (nodes) {
slides = nodes[0];
@ -218,7 +224,8 @@ mg_backend_readmng_retrieve_slides (MgBackendReadmng *self, const xmlDocPtr html
static xmlNodePtr
mg_backend_readmng_retrieve_thumbnail_from_li (MgBackendReadmng *self, xmlNodePtr current_li) {
size_t thumbnail_len = 0;
xmlNodePtr *thumbnail = find_class (current_li, "thumbnail",
MgUtilXML *xml_utils = self->xml_utils;
xmlNodePtr *thumbnail = mg_util_xml_find_class (xml_utils, current_li, "thumbnail",
&thumbnail_len, NULL, 1);
if (thumbnail_len) return thumbnail[0];
return NULL;
@ -227,7 +234,8 @@ mg_backend_readmng_retrieve_thumbnail_from_li (MgBackendReadmng *self, xmlNodePt
static xmlNodePtr
mg_backend_readmng_retrieve_title_from_li (MgBackendReadmng *self, xmlNodePtr li) {
size_t title_len = 0;
xmlNodePtr *title = find_class (li, "title", &title_len, NULL, 1);
MgUtilXML *xml_utils = self->xml_utils;
xmlNodePtr *title = mg_util_xml_find_class (xml_utils, li, "title", &title_len, NULL, 1);
if (title_len) return title[0];
return NULL;
}
@ -246,7 +254,8 @@ mg_backend_readmng_find_a_link_chapter (MgBackendReadmng *self,
static char *
mg_backend_get_id_manga_link (MgBackendReadmng *self, xmlNodePtr a) {
char *re_str = "readmng\\.com/([^/]+)";
return match_1 (re_str, get_attr (a, "href"));
MgUtilXML *xml_utils = self->xml_utils;
return match_1 (re_str, mg_util_xml_get_attr (xml_utils, a, "href"));
}
static void
@ -257,13 +266,15 @@ mg_backend_readmng_extract_manga_info_from_current_li (MgBackendReadmng *self,
xmlNodePtr title = mg_backend_readmng_retrieve_title_from_li (self, current_li);
xmlNodePtr a = mg_backend_readmng_find_a_link_chapter (self, current_li);
xmlNodePtr img;
MgUtilXML *xml_utils = self->xml_utils;
char *id_manga = NULL;
if (thumbnail && title && (img = mg_backend_readmng_retrieve_img_from_thumbnail (self, thumbnail))
&& a && (id_manga = mg_backend_get_id_manga_link (self, a))) {
g_list_store_append (mangas,
mg_manga_new (get_attr (img, "src"), (char *)xmlNodeGetContent (title), id_manga));
mg_manga_new (mg_util_xml_get_attr (xml_utils, img, "src"),
(char *)xmlNodeGetContent (title), id_manga));
}
}

View File

@ -143,7 +143,7 @@ mg_manga_get_property (GObject *object,
MgManga *
mg_manga_new (const char *const image_url, const char *const title, const char *id) {
MgManga *self = NULL;
self = (MG_MANGA) (g_object_new (MG_TYPE_MANGA, NULL));
self = MG_MANGA ((g_object_new (MG_TYPE_MANGA, NULL)));
self->image_url = alloc_string (strlen (image_url));
self->title = alloc_string (strlen (title));
self->id = alloc_string (strlen (id));

67
src/util/soup.c Normal file
View File

@ -0,0 +1,67 @@
#include <libsoup/soup.h>
#include <openmg/util/soup.h>
struct _MgUtilSoup {
GObject parent_instance;
};
G_DEFINE_TYPE (MgUtilSoup, mg_util_soup, G_TYPE_OBJECT)
MgUtilSoup *
mg_util_soup_new () {
MgUtilSoup *self = NULL;
self = MG_UTIL_SOUP (g_object_new (MG_TYPE_UTIL_SOUP, NULL));
return self;
}
static char *
mg_util_soup_copy_binary_data (MgUtilSoup *self, const char *input, size_t size);
static void
mg_util_soup_class_init (MgUtilSoupClass *class) {
}
static void
mg_util_soup_init (MgUtilSoup *self) {
}
char *
mg_util_soup_get_request (MgUtilSoup *self, const char *url, gsize *size_response_text) {
SoupSession *soup_session;
SoupMessage *msg;
GValue response = G_VALUE_INIT;
guint status;
*size_response_text = 0;
g_value_init (&response, G_TYPE_BYTES);
soup_session = soup_session_new ();
msg = soup_message_new ("GET", url);
status = soup_session_send_message (soup_session, msg);
g_object_get_property(
G_OBJECT (msg),
"response-body-data",
&response);
const char *html_response = g_bytes_get_data ((GBytes *)
g_value_peek_pointer (&response),
size_response_text);
char *return_value = mg_util_soup_copy_binary_data(self, html_response, *size_response_text);
g_value_unset (&response);
g_object_unref (soup_session);
g_object_unref (msg);
return return_value;
}
static char *
mg_util_soup_copy_binary_data (MgUtilSoup *self, const char *input, size_t size) {
char *response = NULL;
if (size) {
response = g_realloc(response, sizeof *response * size);
for (size_t i = 0; i<size; i++) {
response[i] = input[i];
}
}
return response;
}

140
src/util/xml.c Normal file
View File

@ -0,0 +1,140 @@
#include <glib-object.h>
#include <libxml/HTMLparser.h>
#include <libxml/xpath.h>
#include <openmg/util/xml.h>
struct _MgUtilXML {
GObject parent_instance;
};
G_DEFINE_TYPE (MgUtilXML, mg_util_xml, G_TYPE_OBJECT)
static void
mg_util_xml_class_init (MgUtilXMLClass *class) {
}
static void
mg_util_xml_init (MgUtilXML *self) {
}
MgUtilXML *
mg_util_xml_new () {
MgUtilXML *self = NULL;
self = MG_UTIL_XML ((g_object_new (MG_TYPE_UTIL_XML, NULL)));
return self;
}
xmlNodePtr *
mg_util_xml_find_class (MgUtilXML *self, xmlNodePtr node, char *class,
size_t *len, xmlNodePtr *nodes, int return_on_first) {
for (xmlNodePtr child = node->children; child; child=child->next) {
char *attr = mg_util_xml_get_attr (self, child, "class");
if (attr && mg_util_xml_has_class (self, attr, class)) {
(*len)++;
nodes = g_realloc (nodes, sizeof *nodes * *len);
nodes[*len-1] = child;
if (return_on_first) {
return nodes;
}
}
if (node->children) {
xmlNodePtr child = node->children;
for (;child;child=child->next) {
nodes = mg_util_xml_find_class (self, child, class,
len, nodes, return_on_first);
if (*len) {
return nodes;
}
}
}
}
return nodes;
}
char *
mg_util_xml_get_attr (MgUtilXML *self, xmlNodePtr const node, const char *attr_name) {
char *return_value = NULL;
if (!node) {
return NULL;
}
for (xmlAttr *attr = node->properties; attr; attr=attr->next) {
if (!xmlStrcmp(attr->name, (const xmlChar *) attr_name)
&& attr->children && attr->children->content) {
if (!attr->children->content) continue;
size_t content_len = strlen((char *)
attr->children->content);
return_value = alloc_string(content_len);
copy_substring ((char *) attr->children->content, return_value,
content_len,
0,
content_len);
break;
}
}
return return_value;
}
void
mg_util_xml_print_debug_nodes (MgUtilXML *self,
const xmlDocPtr html_document, xmlNodePtr *nodes,
size_t nodes_len) {
xmlBufferPtr buffer = xmlBufferCreate ();
for (int i = 0; i < nodes_len; i++) {
xmlNodeDump (buffer, html_document, nodes[i],
0, 1);
}
xmlBufferDump (stdout, buffer);
xmlBufferFree (buffer);
}
int
mg_util_xml_has_class (MgUtilXML *self,
const char *class_attribute, const char *class_to_check) {
char *re = "\\s+";
struct SplittedString *classes;
int return_value = 0;
classes = split (re, strlen(re), class_attribute,
strlen (class_attribute));
for (int i = 0; i<classes->n_strings; i++) {
if (strcmp (classes->substrings[i].content, class_to_check) == 0) {
return_value = 1;
goto cleanup_has_class;
}
}
cleanup_has_class:
splitted_string_free (classes);
return return_value;
}
xmlNodePtr *
mg_util_xml_loop_search_class (MgUtilXML *self, const xmlNodePtr node, xmlNodePtr *nodes,
const char * class, size_t *len) {
char *content = mg_util_xml_get_attr (self, node, "class");
if (!content) {
return nodes;
}
if (mg_util_xml_has_class (self, content, class)) {
(*len)++;
nodes = g_realloc (nodes, (sizeof *nodes) * *len);
nodes[(*len)-1] = xmlCopyNode(node, XML_COPY_NODE_RECURSIVE);
}
g_free (content);
return nodes;
}
xmlXPathObjectPtr
mg_util_xml_get_nodes_xpath_expression (MgUtilXML *self,
const xmlDocPtr document, char *xpath) {
xmlXPathContextPtr context;
xmlXPathObjectPtr result;
context = xmlXPathNewContext (document);
result = xmlXPathEvalExpression ((const xmlChar *)xpath, context);
xmlXPathFreeContext (context);
return result;
}

View File

@ -1,6 +1,8 @@
#include <gtk/gtk.h>
#include <openmg/manga.h>
#include <openmg/util/soup.h>
#include <openmg/view/list_view_manga.h>
#include <manga.h>
@ -28,7 +30,9 @@ setup_list_view_mangas (GtkSignalListItemFactory *factory,
size_t size_downloaded_image = 0;
char *downloaded_image;
downloaded_image = get_request (mg_manga_get_image_url(manga), &size_downloaded_image);
MgUtilSoup *util_soup = mg_util_soup_new ();
downloaded_image = mg_util_soup_get_request (util_soup, mg_manga_get_image_url(manga),
&size_downloaded_image);
tmp_image = g_file_new_tmp ("mangareadertmpfileXXXXXX",
&iostream,
&error