forked from sergiotarxz/mangareader
186 lines
5.6 KiB
C
186 lines
5.6 KiB
C
#include <glib-object.h>
|
|
|
|
#include <pango/pango.h>
|
|
|
|
#include <libxml/HTMLparser.h>
|
|
#include <libxml/xpath.h>
|
|
|
|
#include <openmg/util/string.h>
|
|
#include <openmg/util/regex.h>
|
|
#include <openmg/util/xml.h>
|
|
|
|
struct _MgUtilXML {
|
|
GObject parent_instance;
|
|
};
|
|
|
|
G_DEFINE_TYPE (MgUtilXML, mg_util_xml, G_TYPE_OBJECT)
|
|
|
|
|
|
static void
|
|
mg_util_xml_class_init (MgUtilXMLClass *class) {
|
|
}
|
|
static void
|
|
mg_util_xml_init (MgUtilXML *self) {
|
|
}
|
|
static char *
|
|
mg_util_xml_get_as_char_node (MgUtilXML *self,
|
|
xmlNodePtr node, xmlDocPtr document);
|
|
|
|
MgUtilXML *
|
|
mg_util_xml_new () {
|
|
MgUtilXML *self = NULL;
|
|
self = MG_UTIL_XML ((g_object_new (MG_TYPE_UTIL_XML, NULL)));
|
|
return self;
|
|
}
|
|
|
|
xmlNodePtr *
|
|
mg_util_xml_find_class (MgUtilXML *self, xmlNodePtr node, char *class,
|
|
size_t *len, xmlNodePtr *nodes, int return_on_first) {
|
|
for (xmlNodePtr child = node->children; child; child=child->next) {
|
|
char *attr = mg_util_xml_get_attr (self, child, "class");
|
|
if (attr && mg_util_xml_has_class (self, attr, class)) {
|
|
(*len)++;
|
|
nodes = g_realloc (nodes, sizeof *nodes * *len);
|
|
nodes[*len-1] = child;
|
|
if (return_on_first) {
|
|
return nodes;
|
|
}
|
|
}
|
|
if (node->children) {
|
|
xmlNodePtr child = node->children;
|
|
for (;child;child=child->next) {
|
|
nodes = mg_util_xml_find_class (self, child, class,
|
|
len, nodes, return_on_first);
|
|
if (*len) {
|
|
return nodes;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nodes;
|
|
}
|
|
|
|
char *
|
|
mg_util_xml_get_attr (MgUtilXML *self, xmlNodePtr const node, const char *attr_name) {
|
|
char *return_value = NULL;
|
|
MgUtilString *string_util = mg_util_string_new ();
|
|
if (!node) {
|
|
return NULL;
|
|
}
|
|
for (xmlAttr *attr = node->properties; attr; attr=attr->next) {
|
|
if (!xmlStrcmp(attr->name, (const xmlChar *) attr_name)
|
|
&& attr->children && attr->children->content) {
|
|
if (!attr->children->content) continue;
|
|
size_t content_len = strlen((char *)
|
|
attr->children->content);
|
|
return_value = mg_util_string_alloc_string (string_util, content_len);
|
|
mg_util_string_copy_substring (string_util, (char *) attr->children->content,
|
|
return_value, content_len, 0, content_len);
|
|
break;
|
|
}
|
|
}
|
|
return return_value;
|
|
}
|
|
|
|
void
|
|
mg_util_xml_print_debug_nodes (MgUtilXML *self,
|
|
const xmlDocPtr html_document, xmlNodePtr *nodes,
|
|
size_t nodes_len) {
|
|
xmlBufferPtr buffer = xmlBufferCreate ();
|
|
for (int i = 0; i < nodes_len; i++) {
|
|
xmlNodeDump (buffer, html_document, nodes[i],
|
|
0, 1);
|
|
}
|
|
xmlBufferDump (stdout, buffer);
|
|
xmlBufferFree (buffer);
|
|
}
|
|
|
|
int
|
|
mg_util_xml_has_class (MgUtilXML *self,
|
|
const char *class_attribute, const char *class_to_check) {
|
|
char *re = "\\s+";
|
|
struct SplittedString *classes;
|
|
MgUtilRegex *regex_util = mg_util_regex_new ();
|
|
int return_value = 0;
|
|
classes = mg_util_regex_split (regex_util, re, strlen(re), class_attribute,
|
|
strlen (class_attribute));
|
|
for (int i = 0; i<classes->n_strings; i++) {
|
|
if (strcmp (classes->substrings[i].content, class_to_check) == 0) {
|
|
return_value = 1;
|
|
goto cleanup_has_class;
|
|
}
|
|
}
|
|
|
|
cleanup_has_class:
|
|
mg_util_regex_splitted_string_free (regex_util, classes);
|
|
return return_value;
|
|
}
|
|
|
|
xmlNodePtr *
|
|
mg_util_xml_loop_search_class (MgUtilXML *self, const xmlNodePtr node, xmlNodePtr *nodes,
|
|
const char * class, size_t *len) {
|
|
char *content = mg_util_xml_get_attr (self, node, "class");
|
|
if (!content) {
|
|
return nodes;
|
|
}
|
|
if (mg_util_xml_has_class (self, content, class)) {
|
|
(*len)++;
|
|
nodes = g_realloc (nodes, (sizeof *nodes) * *len);
|
|
nodes[(*len)-1] = xmlCopyNode(node, XML_COPY_NODE_RECURSIVE);
|
|
}
|
|
g_free (content);
|
|
return nodes;
|
|
}
|
|
|
|
xmlXPathObjectPtr
|
|
mg_util_xml_get_nodes_xpath_expression (MgUtilXML *self,
|
|
const xmlDocPtr document, char *xpath) {
|
|
xmlXPathContextPtr context;
|
|
xmlXPathObjectPtr result;
|
|
|
|
context = xmlXPathNewContext (document);
|
|
result = xmlXPathEvalExpression ((const xmlChar *)xpath, context);
|
|
|
|
xmlXPathFreeContext (context);
|
|
|
|
return result;
|
|
}
|
|
|
|
char *
|
|
mg_util_xml_get_title_text (MgUtilXML *self,
|
|
const char *const text) {
|
|
xmlDocPtr document = xmlNewDoc ((xmlChar *) "1.0");
|
|
xmlNodePtr root_node = xmlNewNode (NULL, (xmlChar *) "span");
|
|
xmlNodePtr text_content = NULL;
|
|
xmlDocSetRootElement (document, root_node);
|
|
char *size_text = NULL;
|
|
size_text = g_malloc (sizeof *size_text * 2000);
|
|
|
|
text_content = xmlNewText ((xmlChar *) text);
|
|
xmlAddChild (root_node, text_content);
|
|
snprintf (size_text, 2000, "%d", 30 * PANGO_SCALE);
|
|
xmlNewProp (root_node, (xmlChar *) "size", (xmlChar *) size_text);
|
|
|
|
return mg_util_xml_get_as_char_node (self, root_node, document);
|
|
}
|
|
|
|
static char *
|
|
mg_util_xml_get_as_char_node (MgUtilXML *self,
|
|
xmlNodePtr node, xmlDocPtr document) {
|
|
xmlBufferPtr buffer = xmlBufferCreate ();
|
|
const char *buffer_contents;
|
|
char *return_value = NULL;
|
|
size_t buffer_len;
|
|
MgUtilString *string_util = NULL;
|
|
xmlNodeDump (buffer, document, node, 0, 1);
|
|
|
|
buffer_contents = (char *) xmlBufferContent (buffer);
|
|
buffer_len = strlen (buffer_contents);
|
|
return_value = mg_util_string_alloc_string (string_util, buffer_len);
|
|
mg_util_string_copy_substring (string_util, buffer_contents,
|
|
return_value, buffer_len, 0, buffer_len);
|
|
|
|
xmlBufferFree (buffer);
|
|
return return_value;
|
|
}
|