From eb5b631d504e99435fa90ac94d9b658a991190d2 Mon Sep 17 00:00:00 2001 From: sergiotarxz Date: Mon, 25 Oct 2021 12:36:42 +0200 Subject: [PATCH] Adding untracked file by error. --- manga.c | 272 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100644 manga.c diff --git a/manga.c b/manga.c new file mode 100644 index 0000000..6f43d92 --- /dev/null +++ b/manga.c @@ -0,0 +1,272 @@ +#include +#include +#include +#ifndef PCRE2_CODE_UNIT_WIDTH +#define PCRE2_CODE_UNIT_WIDTH 8 +#include +#endif + +#include + +char * +get_request (const char *url, gsize *size_response_text) { + SoupSession *soup_session; + SoupMessage *msg; + GValue response = G_VALUE_INIT; + guint status; + + *size_response_text = 0; + + g_value_init (&response, G_TYPE_BYTES); + + soup_session = soup_session_new (); + msg = soup_message_new ("GET", url); + status = soup_session_send_message (soup_session, msg); + g_object_get_property( + G_OBJECT (msg), + "response-body-data", + &response); + + const char *html_response = g_bytes_get_data ((GBytes *) + g_value_peek_pointer (&response), + size_response_text); + + char *return_value = copy_binary_data(html_response, *size_response_text); + + g_value_unset (&response); + g_object_unref (soup_session); + g_object_unref (msg); + + return return_value; +} + +char * +copy_binary_data (const char *input, size_t size) { + char *response = NULL; + if (size) { + response = g_realloc(response, sizeof *response * size); + for (size_t i = 0; ichildren; child; child=child->next) { + char *attr = get_attr (child, "class"); + if (attr && has_class (attr, class)) { + (*len)++; + nodes = g_realloc (nodes, sizeof *nodes * *len); + nodes[*len-1] = child; + if (return_on_first) { + return nodes; + } + } + if (node->children) { + xmlNodePtr child = node->children; + for (;child;child=child->next) { + nodes = find_class (child, class, len, nodes, + return_on_first); + if (*len) { + return nodes; + } + } + } + } + return nodes; +} + +void +print_debug_nodes (const xmlDocPtr html_document, + xmlNodePtr *nodes, size_t nodes_len) { + xmlBufferPtr buffer = xmlBufferCreate (); + for (int i = 0; i < nodes_len; i++) { + xmlNodeDump (buffer, html_document, nodes[i], + 0, 1); + } + xmlBufferDump (stdout, buffer); + xmlBufferFree (buffer); +} + +char * +get_attr (xmlNodePtr const node, const char *attr_name) { + char *return_value = NULL; + for (xmlAttr *attr = node->properties; attr; attr=attr->next) { + if (!xmlStrcmp(attr->name, (const xmlChar *) attr_name) + && attr->children && attr->children->content) { + if (!attr->children->content) continue; + size_t content_len = strlen((char *) + attr->children->content); + return_value = alloc_string(content_len); + copy_substring ((char *) attr->children->content, return_value, + content_len, + 0, + content_len); + break; + } + } + return return_value; +} + +void +copy_substring(const char *origin, char *dest, size_t dest_len, size_t start, + size_t len) { + size_t copying_offset = 0; + while (copying_offset < len) { + if (!(start+copying_offset <=dest_len)) { + fprintf(stderr, "Read attempt out of bounds.%ld %ld %ld\n", dest_len, start, len); + break; + } + dest[copying_offset] = origin[start+copying_offset]; + copying_offset++; + } + dest[len] = '\0'; +} + +int +has_class (const char *class_attribute, + const char *class_to_check) { + char *re = "\\s+"; + struct SplittedString *classes; + int return_value = 0; + classes = split(re, strlen(re), class_attribute, + strlen(class_attribute)); + for (int i = 0; in_strings; i++) { + if (strcmp(classes->substrings[i].content, class_to_check) == 0) { + return_value = 1; + goto cleanup_has_class; + } + } + +cleanup_has_class: + splitted_string_free (classes); + return return_value; +} + +struct SplittedString * +split(char *re_str, size_t re_str_size, const char *subject, size_t subject_size) { + pcre2_code_8 *re; + size_t start_pos = 0; + size_t offset = 0; + int regex_compile_error; + PCRE2_SIZE error_offset; + struct SplittedString *splitted_string; + + splitted_string = g_malloc (sizeof *splitted_string); + + splitted_string->n_strings = 0; + splitted_string->substrings = NULL; + re = pcre2_compile ((PCRE2_SPTR8) re_str, + re_str_size, 0, ®ex_compile_error, &error_offset, NULL); + while (start_pos < subject_size) { + int will_break = 0; + iterate_string_to_split(splitted_string, re, &will_break, + subject, subject_size, &start_pos, &offset); + if (will_break) { + break; + } + } + + pcre2_code_free (re); + re = NULL; + + return splitted_string; +} + +char * +alloc_string(size_t len) { + char * return_value = NULL; + return g_malloc (len + 1 * sizeof *return_value); +} + +void +splitted_string_free (struct SplittedString *splitted_string) { + for (int i = 0; in_strings; i++) { + g_free (splitted_string->substrings[i].content); + } + + g_free (splitted_string->substrings); + g_free (splitted_string); +} + +void +iterate_string_to_split(struct SplittedString *splitted_string, pcre2_code *re, int *will_break, const char *subject, + size_t subject_size, size_t *start_pos, size_t *offset) { + pcre2_match_data_8 *match_data; + PCRE2_SIZE *ovector; + int rc; + + splitted_string->n_strings++; + match_data = pcre2_match_data_create_from_pattern_8 (re, NULL); + rc = pcre2_match ( re, (PCRE2_SPTR8) subject, subject_size, *start_pos, 0, match_data, + NULL); + if (splitted_string->substrings) { + splitted_string->substrings = g_realloc (splitted_string->substrings, + (sizeof *splitted_string->substrings) * (*offset + 1)); + } else { + splitted_string->substrings = g_malloc (sizeof *splitted_string->substrings); + } + if (rc < 0) { + struct String *current_substring = + &splitted_string->substrings [*offset]; + current_substring->content = alloc_string (subject_size + - *start_pos); + copy_substring (subject, current_substring->content, + subject_size, + *start_pos, + subject_size - *start_pos); + current_substring->size = subject_size - *start_pos; + + *will_break = 1; + goto cleanup_iterate_string_to_split; + } + ovector = pcre2_get_ovector_pointer_8(match_data); + splitted_string->substrings[*offset].content = alloc_string ( + ovector[0] - *start_pos); + copy_substring (subject, splitted_string->substrings[*offset] + .content, + subject_size, + *start_pos, + ovector[0] - *start_pos); + splitted_string->substrings[*offset].size = + ovector[0] - *start_pos; + + *start_pos = ovector[1]; + + *offset += 1; + +cleanup_iterate_string_to_split: + pcre2_match_data_free (match_data); +} + +xmlXPathObjectPtr +get_nodes_xpath_expression (const xmlDocPtr document, char *xpath) { + xmlXPathContextPtr context; + xmlXPathObjectPtr result; + + context = xmlXPathNewContext (document); + result = xmlXPathEvalExpression ((const xmlChar *)xpath, context); + + xmlXPathFreeContext (context); + + return result; +} + +xmlNodePtr * +loop_search_class (const xmlNodePtr node, xmlNodePtr *nodes, + const char * class, size_t *len) { + char *content = get_attr (node, "class"); + if (!content) { + return nodes; + } + if (has_class (content, class)) { + (*len)++; + nodes = g_realloc (nodes, (sizeof *nodes) * *len); + nodes[(*len)-1] = xmlCopyNode(node, XML_COPY_NODE_RECURSIVE); + } + g_free (content); + return nodes; +}