Don't load entire wxr import file at once to conserve memory. Props tellyworth. fixes #5357

git-svn-id: https://develop.svn.wordpress.org/trunk@6336 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Ryan Boren 2007-11-15 05:36:43 +00:00
parent 46dbf8b1c9
commit e5e7b6d2b4
1 changed files with 93 additions and 84 deletions

View File

@ -3,13 +3,15 @@
class WP_Import { class WP_Import {
var $posts = array (); var $posts = array ();
var $posts_processed = array (); var $post_ids_processed = array ();
// Array of arrays. [[0] => XML fragment, [1] => New post ID] // Array of arrays. [[0] => XML fragment, [1] => New post ID]
var $file; var $file;
var $id; var $id;
var $mtnames = array (); var $mtnames = array ();
var $newauthornames = array (); var $newauthornames = array ();
var $allauthornames = array ();
var $j = -1; var $j = -1;
var $another_pass = false;
function header() { function header() {
echo '<div class="wrap">'; echo '<div class="wrap">';
@ -83,69 +85,65 @@ class WP_Import {
return $user_id; return $user_id;
} }
function get_entries() { function get_entries($process_post_func=NULL) {
set_magic_quotes_runtime(0); set_magic_quotes_runtime(0);
$this->posts = array(); # $this->posts = array();
$this->categories = array(); # $this->categories = array();
$this->tags = array(); # $this->tags = array();
$num = 0; # $num = 0;
$doing_entry = false;
$fp = fopen($this->file, 'r'); for ($i=0; $i<2; $i++) {
if ($fp) { $this->another_pass = false;
while ( !feof($fp) ) { $doing_entry = false;
$importline = rtrim(fgets($fp));
$fp = fopen($this->file, 'r');
if ( false !== strpos($importline, '<wp:category>') ) { if ($fp) {
preg_match('|<wp:category>(.*?)</wp:category>|is', $importline, $category); while ( !feof($fp) ) {
$this->categories[] = $category[1]; $importline = rtrim(fgets($fp));
continue;
} if ( false !== strpos($importline, '<wp:category>') ) {
if ( false !== strpos($importline, '<wp:tag>') ) { preg_match('|<wp:category>(.*?)</wp:category>|is', $importline, $category);
preg_match('|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag); $this->categories[] = $category[1];
$this->tags[] = $tag[1]; continue;
continue; }
} if ( false !== strpos($importline, '<wp:tag>') ) {
if ( false !== strpos($importline, '<item>') ) { preg_match('|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag);
$this->posts[$num] = ''; $this->tags[] = $tag[1];
$doing_entry = true; continue;
continue; }
} if ( false !== strpos($importline, '<item>') ) {
if ( false !== strpos($importline, '</item>') ) { $this->post = '';
$num++; $doing_entry = true;
$doing_entry = false; continue;
continue; }
} if ( false !== strpos($importline, '</item>') ) {
if ( $doing_entry ) { $num++;
$this->posts[$num] .= $importline . "\n"; $doing_entry = false;
if ($process_post_func)
call_user_func($process_post_func, $this->post);
continue;
}
if ( $doing_entry ) {
$this->post .= $importline . "\n";
}
} }
fclose($fp);
} }
foreach ($this->posts as $post) { // skip the second loop iteration unless it's needed
$post_ID = (int) $this->get_tag( $post, 'wp:post_id' ); if ( !$this->another_pass )
if ($post_ID) { break;
$this->posts_processed[$post_ID][0] = &$post;
$this->posts_processed[$post_ID][1] = 0;
}
}
fclose($fp);
} }
} }
function get_wp_authors() { function get_wp_authors() {
$temp = array (); $this->get_entries(array(&$this, 'process_author'));
$i = -1;
foreach ($this->posts as $post) {
if ('' != trim($post)) {
++ $i;
$author = $this->get_tag( $post, 'dc:creator' );
array_push($temp, "$author"); //store the extracted author names in a temporary array
}
}
// We need to find unique values of author names, while preserving the order, so this function emulates the unique_value(); php function, without the sorting. // We need to find unique values of author names, while preserving the order, so this function emulates the unique_value(); php function, without the sorting.
$temp = $this->allauthornames;
$authors[0] = array_shift($temp); $authors[0] = array_shift($temp);
$y = count($temp) + 1; $y = count($temp) + 1;
for ($x = 1; $x < $y; $x ++) { for ($x = 1; $x < $y; $x ++) {
@ -181,6 +179,7 @@ class WP_Import {
array_push($this->newauthornames, "$formnames[$i]"); array_push($this->newauthornames, "$formnames[$i]");
} }
} }
} }
function wp_authors_form() { function wp_authors_form() {
@ -210,16 +209,7 @@ class WP_Import {
} }
function select_authors() { function select_authors() {
$file = wp_import_handle_upload(); $this->get_entries(array(&$this, 'process_author'));
if ( isset($file['error']) ) {
echo '<p>'.__('Sorry, there has been an error.').'</p>';
echo '<p><strong>' . $file['error'] . '</strong></p>';
return;
}
$this->file = $file['file'];
$this->id = (int) $file['id'];
$this->get_entries();
$this->wp_authors_form(); $this->wp_authors_form();
} }
@ -273,15 +263,18 @@ class WP_Import {
} }
} }
function process_author($post) {
$author = $this->get_tag( $post, 'dc:creator' );
if ($author)
$this->allauthornames[] = $author;
}
function process_posts() { function process_posts() {
return; //FIXME
$i = -1; $i = -1;
echo '<ol>'; echo '<ol>';
foreach ($this->posts as $post) { $this->get_entries(array(&$this, 'process_post'));
$result = $this->process_post($post);
if ( is_wp_error( $result ) )
return $result;
}
echo '</ol>'; echo '</ol>';
@ -292,9 +285,9 @@ class WP_Import {
function process_post($post) { function process_post($post) {
global $wpdb; global $wpdb;
$post_ID = (int) $this->get_tag( $post, 'wp:post_id' ); $post_ID = (int) $this->get_tag( $post, 'wp:post_id' );
if ( $post_ID && !empty($this->posts_processed[$post_ID][1]) ) // Processed already if ( $post_ID && !empty($this->post_ids_processed[$post_ID]) ) // Processed already
return 0; return 0;
// There are only ever one of these // There are only ever one of these
@ -341,13 +334,15 @@ class WP_Import {
// If it has parent, process parent first. // If it has parent, process parent first.
$post_parent = (int) $post_parent; $post_parent = (int) $post_parent;
if ($parent = $this->posts_processed[$post_parent]) { if ($post_parent) {
if (!$parent[1]) { if ( $parent = $this->post_ids_processed[$post_parent] ) {
$result = $this->process_post($parent[0]); // If not yet, process the parent first. $post_parent = $parent; // new ID of the parent
if ( is_wp_error( $result ) ) }
return $result; else {
// wait until the parent has been processed
$this->another_pass = true;
return;
} }
$post_parent = $parent[1]; // New ID of the parent;
} }
echo '<li>'; echo '<li>';
@ -361,8 +356,9 @@ class WP_Import {
return $post_id; return $post_id;
// Memorize old and new ID. // Memorize old and new ID.
if ( $post_id && $post_ID && $this->posts_processed[$post_ID] ) if ( $post_id && $post_ID ) {
$this->posts_processed[$post_ID][1] = $post_id; // New ID. $this->post_ids_processed[intval($post_ID)] = intval($post_id);
}
// Add categories. // Add categories.
if (count($categories) > 0) { if (count($categories) > 0) {
@ -439,18 +435,30 @@ class WP_Import {
} } } }
} }
function import() { function import($id) {
$this->id = (int) $_GET['id']; $this->id = (int) $id;
$this->file = get_attached_file($this->id); $this->file = get_attached_file($this->id);
$this->get_authors_from_post(); $this->get_authors_from_post();
$this->get_entries(); $this->get_entries(array(&$this, 'process_post'));
$this->process_categories(); $this->process_categories();
$this->process_tags(); $this->process_tags();
$result = $this->process_posts(); $result = $this->process_posts();
if ( is_wp_error( $result ) ) if ( is_wp_error( $result ) )
return $result; return $result;
} }
function handle_upload() {
$file = wp_import_handle_upload();
if ( isset($file['error']) ) {
echo '<p>'.__('Sorry, there has been an error.').'</p>';
echo '<p><strong>' . $file['error'] . '</strong></p>';
return false;
}
$this->file = $file['file'];
$this->id = (int) $file['id'];
return true;
}
function dispatch() { function dispatch() {
if (empty ($_GET['step'])) if (empty ($_GET['step']))
@ -465,11 +473,12 @@ class WP_Import {
break; break;
case 1 : case 1 :
check_admin_referer('import-upload'); check_admin_referer('import-upload');
$this->select_authors(); if ( $this->handle_upload() )
$this->select_authors();
break; break;
case 2: case 2:
check_admin_referer('import-wordpress'); check_admin_referer('import-wordpress');
$result = $this->import(); $result = $this->import( $_GET['id'] );
if ( is_wp_error( $result ) ) if ( is_wp_error( $result ) )
echo $result->get_error_message(); echo $result->get_error_message();
break; break;