Skip to content
Merged
106 changes: 56 additions & 50 deletions packages/playground/data-liberation/plugin.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@

/**
* Don't run KSES on the attribute values during the import.
*
*
* Without this filter, WP_HTML_Tag_Processor::set_attribute() will
* assume the value is a URL and run KSES on it, which will incorrectly
* prefix relative paths with http://.
*
*
* For example:
*
*
* > $html = new WP_HTML_Tag_Processor( '<img>' );
* > $html->next_tag();
* > $html->set_attribute( 'src', './_assets/log-errors.png' );
Expand All @@ -25,6 +25,41 @@
return [];
});

/**
* Development debug code to run the import manually.
* @TODO: Remove this in favor of a CLI command.
*/
add_action('init', function() {
return;
$wxr_path = __DIR__ . '/tests/fixtures/wxr-simple.xml';
$importer = WP_Stream_Importer::create_for_wxr_file(
$wxr_path
);
while($importer->next_step()) {
// ...
}
return;
$importer->next_step();
$paused_importer_state = $importer->get_reentrancy_cursor();

echo "\n\n";
echo "moving to importer2\n";
echo "\n\n";

$importer2 = WP_Stream_Importer::create_for_wxr_file(
$wxr_path,
array(),
$paused_importer_state
);
$importer2->next_step();
$importer2->next_step();
$importer2->next_step();
// $importer2->next_step();
// var_dump($importer2);

die("YAY");
});

// Register admin menu
add_action('admin_menu', function() {
add_menu_page(
Expand Down Expand Up @@ -86,7 +121,7 @@ function data_liberation_admin_page() {
data_liberation_process_import();
echo '</pre>';
}

?>
<h2>Active import</h2>
<?php
Expand Down Expand Up @@ -148,9 +183,9 @@ function data_liberation_admin_page() {
>
<?php wp_nonce_field('data_liberation_import'); ?>
<input type="hidden" name="action" value="data_liberation_import">

<h2>Import Content</h2>

<table class="form-table">
<tr>
<th scope="row">Import Type</th>
Expand All @@ -175,7 +210,7 @@ function data_liberation_admin_page() {
</label>
</td>
</tr>

<tr data-wp-context='{ "importType": "wxr_file" }'
data-wp-class--hidden="!state.isImportTypeSelected">
<th scope="row">WXR File</th>
Expand All @@ -184,7 +219,7 @@ function data_liberation_admin_page() {
<p class="description">Upload a WordPress eXtended RSS (WXR) file</p>
</td>
</tr>

<tr data-wp-context='{ "importType": "wxr_url" }'
data-wp-class--hidden="!state.isImportTypeSelected">
<th scope="row">WXR URL</th>
Expand All @@ -193,7 +228,7 @@ function data_liberation_admin_page() {
<p class="description">Enter the URL of a WXR file</p>
</td>
</tr>

<tr data-wp-context='{ "importType": "markdown_zip" }'
data-wp-class--hidden="!state.isImportTypeSelected">
<th scope="row">Markdown ZIP</th>
Expand All @@ -210,7 +245,7 @@ function data_liberation_admin_page() {
<h2>Previous Imports</h2>

<p>TODO: Show a table of previous imports.</p>

<table class="form-table">
<tr>
<th scope="row">Date</th>
Expand Down Expand Up @@ -329,7 +364,7 @@ function data_liberation_admin_page() {
*/
// if(is_wp_error(wp_schedule_event(time(), 'data_liberation_minute', 'data_liberation_process_import'))) {
// wp_delete_attachment($attachment_id, true);
// // @TODO: More user friendly error message – maybe redirect back to the import screen and
// // @TODO: More user friendly error message – maybe redirect back to the import screen and
// // show the error there.
// wp_die('Failed to schedule import – the "data_liberation_minute" schedule may not be registered.');
// }
Expand All @@ -353,20 +388,9 @@ function data_liberation_process_import() {

function data_liberation_import_step($import) {
$importer = data_liberation_create_importer($import);
// @TODO: Save the last importer state so we can resume it later if interrupted.
update_option('data_liberation_import_progress', [
'status' => 'Downloading static assets...',
'current' => 0,
'total' => 0
]);
$importer->frontload_assets();
// @TODO: Keep track of multiple progress dimensions – posts, assets, categories, etc.
update_option('data_liberation_import_progress', [
'status' => 'Importing posts...',
'current' => 0,
'total' => 0
]);
$importer->import_entities();
while($importer->next_step()) {
// ...Twiddle our thumbs...
}
delete_option('data_liberation_active_import');
// @TODO: Do not echo things. Append to an import log where we can retrace the steps.
// Also, store specific import events in the database so the user can react and
Expand All @@ -382,25 +406,13 @@ function data_liberation_create_importer($import) {
// @TODO: Save the error, report it to the user.
return;
}
$entity_iterator_factory = function() use ($wxr_path) {
$wxr = new WP_WXR_Reader();
$wxr->connect_upstream(new WP_File_Reader($wxr_path));

return $wxr;
};
return WP_Stream_Importer::create(
$entity_iterator_factory
return WP_Stream_Importer::create_for_wxr_file(
$wxr_path
);

case 'wxr_url':
$wxr_url = $import['wxr_url'];
$entity_iterator_factory = function() use ($wxr_url) {
$wxr = new WP_WXR_Reader();
$wxr->connect_upstream(new WP_Remote_File_Reader($wxr_url));
return $wxr;
};
return WP_Stream_Importer::create(
$entity_iterator_factory
return WP_Stream_Importer::create_for_wxr_url(
$import['wxr_url']
);

case 'markdown_zip':
Expand All @@ -419,18 +431,12 @@ function data_liberation_create_importer($import) {
}
}
$markdown_root = $temp_dir;
$entity_iterator_factory = function() use ($markdown_root) {
return new WP_Markdown_Directory_Tree_Reader(
$markdown_root,
1000
);
};
return WP_Markdown_Importer::create(
$entity_iterator_factory, [
return WP_Markdown_Importer::create_for_markdown_directory(
$markdown_root, [
'source_site_url' => 'file://' . $markdown_root,
'local_markdown_assets_root' => $markdown_root,
'local_markdown_assets_url_prefix' => '@site/',
]
);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<?php

interface WP_Byte_Reader {
public function pause(): array|bool;
public function resume( $paused_state ): bool;
public function tell(): int;
public function seek( int $offset ): bool;
public function is_finished(): bool;
public function next_bytes(): bool;
public function get_bytes(): string|null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ class WP_File_Reader implements WP_Byte_Reader {
protected $chunk_size;
protected $file_pointer;
protected $offset_in_file;
protected $output_bytes;
protected $output_bytes = '';
protected $last_chunk_size = 0;
protected $last_error;
protected $state = self::STATE_STREAMING;

Expand All @@ -18,22 +19,24 @@ public function __construct( $file_path, $chunk_size = 8096 ) {
$this->chunk_size = $chunk_size;
}

/**
* Really these are just `tell()` and `seek()` operations, only the state is more
* involved than a simple offset. Hmm.
*/
public function pause(): array|bool {
return array(
'offset_in_file' => $this->offset_in_file,
);
public function tell(): int {
// Save the previous offset, not the current one.
// This way, after resuming, the next read will yield the same $output_bytes
// as we have now.
return $this->offset_in_file - $this->last_chunk_size;
}

public function resume( $paused_state ): bool {
public function seek( $offset_in_file ): bool {
if ( ! is_int( $offset_in_file ) ) {
_doing_it_wrong( __METHOD__, 'Cannot set a file reader cursor to a non-integer offset.', '1.0.0' );
return false;
}
if ( $this->file_pointer ) {
_doing_it_wrong( __METHOD__, 'Cannot resume a file reader that is already initialized.', '1.0.0' );
_doing_it_wrong( __METHOD__, 'Cannot set a file reader cursor on a file reader that is already initialized.', '1.0.0' );
return false;
}
$this->offset_in_file = $paused_state['offset_in_file'];
$this->offset_in_file = $offset_in_file;
$this->last_chunk_size = 0;
return true;
}

Expand All @@ -50,7 +53,8 @@ public function get_last_error(): string|null {
}

public function next_bytes(): bool {
$this->output_bytes = '';
$this->output_bytes = '';
$this->last_chunk_size = 0;
if ( $this->last_error || $this->is_finished() ) {
return false;
}
Expand All @@ -66,7 +70,8 @@ public function next_bytes(): bool {
$this->state = static::STATE_FINISHED;
return false;
}
$this->offset_in_file += strlen( $bytes );
$this->last_chunk_size = strlen( $bytes );
$this->offset_in_file += $this->last_chunk_size;
$this->output_bytes .= $bytes;
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,19 @@ public function __construct( $url ) {
$this->url = $url;
}

public function tell(): int {
return $this->bytes_already_read + $this->skip_bytes;
}

public function seek( $offset_in_file ): bool {
if ( $this->request ) {
_doing_it_wrong( __METHOD__, 'Cannot set a remote file reader cursor on a remote file reader that is already initialized.', '1.0.0' );
return false;
}
$this->skip_bytes = $offset_in_file;
return true;
}

public function next_bytes(): bool {
if ( null === $this->request ) {
$this->request = new WordPress\AsyncHttp\Request(
Expand Down Expand Up @@ -90,21 +103,6 @@ public function get_bytes(): string|null {
return $this->current_chunk;
}

public function pause(): array|bool {
return array(
'offset_in_file' => $this->bytes_already_read + $this->skip_bytes,
);
}

public function resume( $paused_state ): bool {
if ( $this->request ) {
_doing_it_wrong( __METHOD__, 'Cannot resume a remote file reader that is already initialized.', '1.0.0' );
return false;
}
$this->skip_bytes = $paused_state['offset_in_file'];
return true;
}

public function is_finished(): bool {
return $this->is_finished;
}
Expand Down
Loading
Loading