forked from sergiotarxz/mangareader
Making some fixes and searching for divs.
Next step is pcre2 integration.
This commit is contained in:
parent
9591489cd9
commit
595eda2215
92
mangafox.c
92
mangafox.c
@ -15,40 +15,16 @@ xmlXPathObjectPtr
|
|||||||
get_nodes_xpath_expression (
|
get_nodes_xpath_expression (
|
||||||
const xmlDocPtr document,
|
const xmlDocPtr document,
|
||||||
char *xpath);
|
char *xpath);
|
||||||
|
char *
|
||||||
|
get_request (const char *url, gsize *size_response_text);
|
||||||
void
|
void
|
||||||
retrieve_mangafox_title () {
|
retrieve_mangafox_title () {
|
||||||
SoupSession
|
|
||||||
*soup_session;
|
|
||||||
SoupMessage *msg;
|
|
||||||
GValue response = G_VALUE_INIT;
|
|
||||||
guint status;
|
|
||||||
gsize size_response_text;
|
|
||||||
xmlDocPtr html_response;
|
xmlDocPtr html_response;
|
||||||
|
gsize *size_response_text = malloc (sizeof (gsize));
|
||||||
g_value_init (&response, G_TYPE_BYTES);
|
char *response_text = get_request (mangafox_url,
|
||||||
|
size_response_text);
|
||||||
soup_session =
|
|
||||||
soup_session_new();
|
|
||||||
msg =
|
|
||||||
soup_message_new(
|
|
||||||
"GET",
|
|
||||||
mangafox_url
|
|
||||||
);
|
|
||||||
status =
|
|
||||||
soup_session_send_message (soup_session, msg);
|
|
||||||
g_object_get_property(
|
|
||||||
G_OBJECT (msg),
|
|
||||||
"response-body-data",
|
|
||||||
&response);
|
|
||||||
const char *response_text =
|
|
||||||
g_bytes_get_data (
|
|
||||||
(GBytes *)
|
|
||||||
g_value_peek_pointer
|
|
||||||
(&response),
|
|
||||||
&size_response_text
|
|
||||||
);
|
|
||||||
html_response = htmlReadMemory (response_text,
|
html_response = htmlReadMemory (response_text,
|
||||||
size_response_text,
|
*size_response_text,
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
HTML_PARSE_RECOVER | HTML_PARSE_NODEFDTD
|
HTML_PARSE_RECOVER | HTML_PARSE_NODEFDTD
|
||||||
@ -56,29 +32,67 @@ retrieve_mangafox_title() {
|
|||||||
);
|
);
|
||||||
size_t manga_size;
|
size_t manga_size;
|
||||||
parse_main_mangafox_page (html_response, &manga_size);
|
parse_main_mangafox_page (html_response, &manga_size);
|
||||||
|
free (response_text);
|
||||||
|
}
|
||||||
|
|
||||||
|
char *
|
||||||
|
get_request (const char *url, gsize *size_response_text) {
|
||||||
|
SoupSession *soup_session;
|
||||||
|
SoupMessage *msg;
|
||||||
|
GValue response = G_VALUE_INIT;
|
||||||
|
guint status;
|
||||||
|
|
||||||
|
*size_response_text = 0;
|
||||||
|
|
||||||
|
g_value_init (&response, G_TYPE_BYTES);
|
||||||
|
|
||||||
|
soup_session = soup_session_new ();
|
||||||
|
msg = soup_message_new ("GET", url);
|
||||||
|
status = soup_session_send_message (soup_session, msg);
|
||||||
|
g_object_get_property(
|
||||||
|
G_OBJECT (msg),
|
||||||
|
"response-body-data",
|
||||||
|
&response);
|
||||||
|
|
||||||
printf("%u\n", status);
|
printf("%u\n", status);
|
||||||
|
const char *html_response = g_bytes_get_data ((GBytes *)
|
||||||
|
g_value_peek_pointer (&response),
|
||||||
|
size_response_text);
|
||||||
|
|
||||||
|
char *return_value = g_strndup (html_response, *size_response_text);
|
||||||
|
|
||||||
|
g_value_unset (&response);
|
||||||
|
g_object_unref (soup_session);
|
||||||
|
g_object_unref (msg);
|
||||||
|
|
||||||
|
return return_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
struct Manga *
|
struct Manga *
|
||||||
parse_main_mangafox_page (
|
parse_main_mangafox_page (const xmlDocPtr html_document,
|
||||||
const xmlDocPtr html_document,
|
|
||||||
const size_t *size) {
|
const size_t *size) {
|
||||||
xmlIndentTreeOutput = 1;
|
xmlIndentTreeOutput = 1;
|
||||||
// xmlDocDump (stderr, html_document);
|
|
||||||
xmlXPathObjectPtr xpath_result = get_nodes_xpath_expression (html_document,
|
xmlXPathObjectPtr xpath_result = get_nodes_xpath_expression (html_document,
|
||||||
"//a");
|
"//div[@class]");
|
||||||
if (!xpath_result) {
|
if (!xpath_result) {
|
||||||
fprintf(stderr, "Empty xpath result\n");
|
fprintf(stderr, "Empty xpath result\n");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
xmlNodeSetPtr node_set = xpath_result->nodesetval;
|
xmlNodeSetPtr node_set = xpath_result->nodesetval;
|
||||||
printf("%d\n", node_set->nodeNr);
|
if (!node_set) {
|
||||||
|
fprintf(stderr, "No match\n");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
for (int i = 0; i < node_set->nodeNr; i++) {
|
for (int i = 0; i < node_set->nodeNr; i++) {
|
||||||
xmlNodePtr node = node_set->nodeTab[i];
|
xmlNodePtr node = node_set->nodeTab[i];
|
||||||
for (xmlAttr *attrs = node->properties; attrs->next; attrs=attrs->next) {
|
for (xmlAttr *attrs = node->properties; attrs; attrs=attrs->next) {
|
||||||
if (!xmlStrcmp(attrs->name, (const xmlChar *)"href")) {
|
if (!xmlStrcmp(attrs->name, (const xmlChar *)"class")) {
|
||||||
|
if (attrs->children
|
||||||
|
&& attrs->children->content) {
|
||||||
printf("%s\n", (const char *)attrs->children->content);
|
printf("%s\n", (const char *)attrs->children->content);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,9 +100,7 @@ parse_main_mangafox_page (
|
|||||||
}
|
}
|
||||||
|
|
||||||
xmlXPathObjectPtr
|
xmlXPathObjectPtr
|
||||||
get_nodes_xpath_expression (
|
get_nodes_xpath_expression (const xmlDocPtr document, char *xpath) {
|
||||||
const xmlDocPtr document,
|
|
||||||
char *xpath) {
|
|
||||||
xmlXPathContextPtr context;
|
xmlXPathContextPtr context;
|
||||||
xmlXPathObjectPtr result;
|
xmlXPathObjectPtr result;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user