<?php
/**
 * WordPress eXtended RSS file parser implementations
 *
 * @package WordPress
 * @subpackage Importer
 */

class WXR_Parser_XML {
	var $wp_tags = array(
		'wp:post_id', 'wp:post_date', 'wp:post_date_gmt', 'wp:comment_status', 'wp:ping_status', 'wp:attachment_url',
		'wp:status', 'wp:post_name', 'wp:post_parent', 'wp:menu_order', 'wp:post_type', 'wp:post_password',
		'wp:is_sticky', 'wp:term_id', 'wp:category_nicename', 'wp:category_parent', 'wp:cat_name', 'wp:category_description',
		'wp:tag_slug', 'wp:tag_name', 'wp:tag_description', 'wp:term_taxonomy', 'wp:term_parent',
		'wp:term_name', 'wp:term_description', 'wp:author_id', 'wp:author_login', 'wp:author_email', 'wp:author_display_name',
		'wp:author_first_name', 'wp:author_last_name',
	);
	var $wp_sub_tags = array(
		'wp:comment_id', 'wp:comment_author', 'wp:comment_author_email', 'wp:comment_author_url',
		'wp:comment_author_IP',	'wp:comment_date', 'wp:comment_date_gmt', 'wp:comment_content',
		'wp:comment_approved', 'wp:comment_type', 'wp:comment_parent', 'wp:comment_user_id',
	);
    private $postCount;

    function __construct($file) {
        $this->handle = fopen($file, 'r');
        $this->wxr_version = $this->in_post = $this->cdata = $this->data = $this->sub_data = $this->in_tag = $this->in_sub_tag = false;
		$this->authors = $this->posts = $this->term = $this->category = $this->tag = array();

		$this->xml = xml_parser_create ( 'UTF-8' );
		xml_parser_set_option( $this->xml, XML_OPTION_SKIP_WHITE, 1 );
		xml_parser_set_option( $this->xml, XML_OPTION_CASE_FOLDING, 0 );
		xml_set_object( $this->xml, $this );
		xml_set_character_data_handler( $this->xml, 'cdata' );
		xml_set_element_handler( $this->xml, 'tag_open', 'tag_close' );
    }

    function get_header() {
        $result = $this->parse(1);

        if ( is_wp_error( $result ) )
            return $result;

        if ( ! preg_match( '/^\d+\.\d+$/', $this->wxr_version ) )
			    return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) );

        return array(
            'authors' => $this->authors,
            'categories' => $this->category,
            'tags' => $this->tag,
            'terms' => $this->term,
            'base_url' => $this->base_url,
            'version' => $this->wxr_version
        );
    }

    function parse($post_count) { // 16Kb 16384
        // _parse_header should be called at this point.
        $chunk_size = 256;

        while ( ( $data = fread($this->handle, $chunk_size) ) && !feof( $this->handle )  ) {
            if ( ! xml_parse( $this->xml, $data, feof($this->handle) ) ) {
                $current_line = xml_get_current_line_number( $this->xml );
                $current_column = xml_get_current_column_number( $this->xml );
                $error_code = xml_get_error_code( $this->xml );
                $error_string = xml_error_string( $error_code );
                return new WP_Error( 'XML_parse_error', 'There was an error when reading this WXR file', array( $current_line, $current_column, $error_string ) );
            }
            if ( $this->postCount > 0 && $this->postCount == $post_count ) {
                $continue = apply_filters_ref_array( 'chunk_parser_continue', array( true, &$this ) );
                if ( !$continue ) {
                    return;
                }
                $this->postCount = 0;
            }

        }

        if ( !empty($this->posts) )
            apply_filters_ref_array( 'chunk_parser_continue', array( true, &$this ) );
    }

    // explicit dispose
    function dispose() {
        fclose($this->handle);
        xml_parser_free( $this->xml );
    }

	function tag_open( $parse, $tag, $attr ) {
		if ( in_array( $tag, $this->wp_tags ) ) {
			$this->in_tag = substr( $tag, 3 );
			return;
		}

		if ( in_array( $tag, $this->wp_sub_tags ) ) {
			$this->in_sub_tag = substr( $tag, 3 );
			return;
		}

		switch ( $tag ) {
			case 'category':
				if ( isset($attr['domain'], $attr['nicename']) ) {
					$this->sub_data['domain'] = $attr['domain'];
					$this->sub_data['slug'] = $attr['nicename'];
				}
				break;
			case 'item': $this->in_post = true;
			case 'title': if ( $this->in_post ) $this->in_tag = 'post_title'; break;
			case 'guid': $this->in_tag = 'guid'; break;
			case 'dc:creator': $this->in_tag = 'post_author'; break;
			case 'content:encoded': $this->in_tag = 'post_content'; break;
			case 'excerpt:encoded': $this->in_tag = 'post_excerpt'; break;

			case 'wp:term_slug': $this->in_tag = 'slug'; break;
			case 'wp:meta_key': $this->in_sub_tag = 'key'; break;
			case 'wp:meta_value': $this->in_sub_tag = 'value'; break;
		}
	}

	function cdata( $parser, $cdata ) {
		if ( ! trim( $cdata ) )
			return;

		$this->cdata .= trim( $cdata );
	}

	function tag_close( $parser, $tag ) {
		switch ( $tag ) {
			case 'wp:comment':
				if ( ! empty( $this->sub_data ) )
					$this->data['comments'][] = $this->sub_data;
				$this->sub_data = false;
				break;
			case 'category':
				if ( ! empty( $this->sub_data ) ) {
					$this->sub_data['name'] = $this->cdata;
					$this->data['terms'][] = $this->sub_data;
				}
				$this->sub_data = false;
				break;
			case 'wp:postmeta':
				if ( ! empty( $this->sub_data ) )
					$this->data['postmeta'][] = $this->sub_data;
				$this->sub_data = false;
				break;
			case 'item':
				$this->posts[] = $this->data;
				$this->data = false;
                $this->postCount++;
				break;
			case 'wp:category':
			case 'wp:tag':
			case 'wp:term':
				$n = substr( $tag, 3 );
				array_push( $this->$n, $this->data );
				$this->data = false;
				break;
			case 'wp:author':
				if ( ! empty($this->data['author_login']) )
					$this->authors[$this->data['author_login']] = $this->data;
				$this->data = false;
				break;
			case 'wp:base_site_url':
				$this->base_url = $this->cdata;
				break;
			case 'wp:wxr_version':
				$this->wxr_version = $this->cdata;
				break;

			default:
				if ( $this->in_sub_tag ) {
					$this->sub_data[$this->in_sub_tag] = ! empty( $this->cdata ) ? $this->cdata : '';
					$this->in_sub_tag = false;
				} else if ( $this->in_tag ) {
					$this->data[$this->in_tag] = ! empty( $this->cdata ) ? $this->cdata : '';
					$this->in_tag = false;
				}
		}

		$this->cdata = false;
	}
}
/*
function parse_header($file) {
    $parser = new WXR_Parser_XML($file);
    add_filter('chunk_parser_continue', create_function('$a', 'return false;'), 0, 2);
    $parser->parse(1);
    $parser->dispose();
    
    return $parser->get_header();
}
require '../../../wp-load.php';

$mem_usage = memory_get_usage(true);
//$parser = new WXR_Parser_XML('E:\Tools\xamp\htdocs\wp\wp-content\plugins\wp-large-upload\big-export.xml');
$parser = new WXR_Parser_XML('D:\Data\Mis documentos\Descargas\test.wordpress.2011-05-28 (1).xml');

print_r(parse_header('D:\Data\Mis documentos\Descargas\test.wordpress.2011-05-28 (1).xml'));
//echo 'demo'; print_r($parser->parse(8));
$parser->dispose();
$mem_usage = memory_get_usage(true) - $mem_usage;

 echo round($mem_usage,2)." kilobytes";*/