forked from sergiotarxz/mangareader
Achieved extracting covers from manga-slide.
This commit is contained in:
parent
6dd5f63428
commit
5d49d08d1c
106
mangafox.c
106
mangafox.c
|
@ -64,6 +64,13 @@ print_debug_nodes (const xmlDocPtr html_document,
|
||||||
xmlNodePtr *
|
xmlNodePtr *
|
||||||
find_all_manga_slide(const xmlDocPtr html_document,
|
find_all_manga_slide(const xmlDocPtr html_document,
|
||||||
size_t *len);
|
size_t *len);
|
||||||
|
char *
|
||||||
|
get_attr (xmlNodePtr const node, const char *attr_name);
|
||||||
|
char *
|
||||||
|
get_manga_slide_cover(xmlNodePtr node);
|
||||||
|
char *
|
||||||
|
match_1 (char *re_str, char *subject);
|
||||||
|
|
||||||
void
|
void
|
||||||
retrieve_mangafox_title () {
|
retrieve_mangafox_title () {
|
||||||
xmlDocPtr html_response;
|
xmlDocPtr html_response;
|
||||||
|
@ -121,10 +128,34 @@ struct Manga *
|
||||||
parse_main_mangafox_page (const xmlDocPtr html_document,
|
parse_main_mangafox_page (const xmlDocPtr html_document,
|
||||||
const size_t *size) {
|
const size_t *size) {
|
||||||
xmlNodePtr *nodes;
|
xmlNodePtr *nodes;
|
||||||
|
xmlNodePtr node;
|
||||||
size_t nodes_len = 0;
|
size_t nodes_len = 0;
|
||||||
|
|
||||||
nodes = find_all_manga_slide (html_document, &nodes_len);
|
nodes = find_all_manga_slide (html_document, &nodes_len);
|
||||||
print_debug_nodes (html_document, nodes, nodes_len);
|
//print_debug_nodes (html_document, nodes, nodes_len);
|
||||||
|
for (int i = 0; i < nodes_len; i++) {
|
||||||
|
node = nodes[i];
|
||||||
|
char *cover = get_manga_slide_cover(node);
|
||||||
|
if (cover) {
|
||||||
|
printf("%s\n", cover);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
char *
|
||||||
|
get_manga_slide_cover(xmlNodePtr node) {
|
||||||
|
for (xmlNodePtr child = node->children; child; child=child->next) {
|
||||||
|
char *attr = get_attr (child, "class");
|
||||||
|
if (attr && has_class (attr, "m-slide-background")) {
|
||||||
|
char *style = get_attr (child, "style");
|
||||||
|
char *match = match_1 ("background-image:url\\((.*?)\\)", style);
|
||||||
|
if (match) {
|
||||||
|
printf("%s\n", match);
|
||||||
|
return match;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -171,20 +202,39 @@ cleanup_find_all_manga_slide:
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
xmlNodePtr *
|
char *
|
||||||
loop_search_class(const xmlNodePtr node, xmlNodePtr *nodes,
|
get_attr (xmlNodePtr const node, const char *attr_name) {
|
||||||
const char * class, size_t *len) {
|
char *return_value = NULL;
|
||||||
for (xmlAttr *attr = node->properties; attr; attr=attr->next) {
|
for (xmlAttr *attr = node->properties; attr; attr=attr->next) {
|
||||||
if (!xmlStrcmp(attr->name, (const xmlChar *)"class")
|
if (!xmlStrcmp(attr->name, (const xmlChar *) attr_name)
|
||||||
&& attr->children && attr->children->content) {
|
&& attr->children && attr->children->content) {
|
||||||
const char *content = (char *) attr->children->content;
|
if (!attr->children->content) continue;
|
||||||
if (has_class (content, class)) {
|
size_t content_len = strlen((char *)
|
||||||
(*len)++;
|
attr->children->content);
|
||||||
nodes = g_realloc (nodes, (sizeof *nodes) * *len);
|
return_value = alloc_string(content_len);
|
||||||
nodes[(*len)-1] = xmlCopyNode(node, XML_COPY_NODE_RECURSIVE);
|
copy_substring ((char *) attr->children->content, return_value,
|
||||||
}
|
content_len,
|
||||||
|
0,
|
||||||
|
content_len);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return return_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
xmlNodePtr *
|
||||||
|
loop_search_class (const xmlNodePtr node, xmlNodePtr *nodes,
|
||||||
|
const char * class, size_t *len) {
|
||||||
|
char *content = get_attr (node, "class");
|
||||||
|
if (!content) {
|
||||||
|
return nodes;
|
||||||
|
}
|
||||||
|
if (has_class (content, class)) {
|
||||||
|
(*len)++;
|
||||||
|
nodes = g_realloc (nodes, (sizeof *nodes) * *len);
|
||||||
|
nodes[(*len)-1] = xmlCopyNode(node, XML_COPY_NODE_RECURSIVE);
|
||||||
|
}
|
||||||
|
g_free (content);
|
||||||
return nodes;
|
return nodes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -248,6 +298,36 @@ split(char *re_str, size_t re_str_size, const char *subject, size_t subject_size
|
||||||
return splitted_string;
|
return splitted_string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char *
|
||||||
|
match_1 (char *re_str, char *subject) {
|
||||||
|
pcre2_code *re;
|
||||||
|
pcre2_match_data *match_data;
|
||||||
|
|
||||||
|
char *return_value;
|
||||||
|
int regex_compile_error;
|
||||||
|
int rc;
|
||||||
|
size_t len_match = 0;
|
||||||
|
|
||||||
|
return_value = NULL;
|
||||||
|
PCRE2_SIZE error_offset;
|
||||||
|
|
||||||
|
re = pcre2_compile ((PCRE2_SPTR8) re_str, strlen (re_str), 0,
|
||||||
|
®ex_compile_error, &error_offset, NULL);
|
||||||
|
match_data = pcre2_match_data_create_from_pattern (re, NULL);
|
||||||
|
rc = pcre2_match (re, (PCRE2_SPTR8) subject, strlen (subject),
|
||||||
|
0, 0, match_data, NULL);
|
||||||
|
if (rc < 0 ) {
|
||||||
|
goto cleanup_match;
|
||||||
|
}
|
||||||
|
|
||||||
|
pcre2_substring_get_bynumber (match_data, 1, (PCRE2_UCHAR8**)
|
||||||
|
&return_value, &len_match);
|
||||||
|
cleanup_match:
|
||||||
|
pcre2_match_data_free (match_data);
|
||||||
|
pcre2_code_free (re);
|
||||||
|
return return_value;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
iterate_string_to_split(struct SplittedString *splitted_string, pcre2_code *re, int *will_break, const char *subject,
|
iterate_string_to_split(struct SplittedString *splitted_string, pcre2_code *re, int *will_break, const char *subject,
|
||||||
size_t subject_size, size_t *start_pos, size_t *offset) {
|
size_t subject_size, size_t *start_pos, size_t *offset) {
|
||||||
|
@ -257,7 +337,7 @@ iterate_string_to_split(struct SplittedString *splitted_string, pcre2_code *re,
|
||||||
|
|
||||||
splitted_string->n_strings++;
|
splitted_string->n_strings++;
|
||||||
match_data = pcre2_match_data_create_from_pattern_8 (re, NULL);
|
match_data = pcre2_match_data_create_from_pattern_8 (re, NULL);
|
||||||
rc = pcre2_match_8 ( re, (PCRE2_SPTR8) subject, subject_size, *start_pos, 0, match_data,
|
rc = pcre2_match ( re, (PCRE2_SPTR8) subject, subject_size, *start_pos, 0, match_data,
|
||||||
NULL);
|
NULL);
|
||||||
if (splitted_string->substrings) {
|
if (splitted_string->substrings) {
|
||||||
splitted_string->substrings = g_realloc (splitted_string->substrings,
|
splitted_string->substrings = g_realloc (splitted_string->substrings,
|
||||||
|
@ -300,7 +380,7 @@ cleanup_iterate_string_to_split:
|
||||||
|
|
||||||
char *
|
char *
|
||||||
alloc_string(size_t len) {
|
alloc_string(size_t len) {
|
||||||
char * return_value;
|
char * return_value = NULL;
|
||||||
return g_malloc (len + 1 * sizeof *return_value);
|
return g_malloc (len + 1 * sizeof *return_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue