将CDATA标签给过滤掉,它总是能够知道如何解析

作者:美狮美高梅官方网站

发现是原来有xml的数据是. 出现了连续的3个], 造成上面的解析函数不能处理。

class JSON
{
var $at   = 0;
var $ch   = '';
var $text = '';

对于 test这种xml标签, 要使用 attribute函数。

$this->error('Syntax error');
}

当时在网上找了一个CDATA的转换器, 修改之后, 将CDATA标签给过滤掉。如下
复制代码 代码如下:
 // States:
        //
        //     'out'
        //     '        //     '        //     '        //     '        //     '        //     '        //     '        //     'in'
        //     ']'
        //     ']]'
        //
        // (Yes, the states a represented by strings.)
        //
        $state = 'out';
        $a = str_split($xml);
        $new_xml = '';
        foreach ($a AS $k => $v) {
            // Deal with "state".
            switch ( $state ) {
                case 'out':
                    if ( '                        $state = $v;
                    } else {
                        $new_xml .= $v;
                    }
                break;
                case '                    if ( '!' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;
                 case '                    if ( '[' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;
                case '                    if ( 'C' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;
                case '                    if ( 'D' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;
                case '                    if ( 'A' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;
                case '                    if ( 'T' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;
                case '                    if ( 'A' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;
                case '                    if ( '[' == $v  ) {
                        $cdata = '';
                        $state = 'in';
                    } else {
                        $new_xml .= $state . $v;
                        $state = 'out';
                    }
                break;
                case 'in':
                    if ( ']' == $v ) {
                        $state = $v;
                    } else {
                        $cdata .= $v;
                    }
                break;
                case ']':
                    if (  ']' == $v  ) {
                        $state = $state . $v;
                    } else {
                        $cdata .= $state . $v;
                        $state = 'in';
                    }
                break;
                case ']]':
   if (  '>' == $v  ) {
    $new_xml .= htmlentities($cdata);
#       $new_xml.= $cdata;
//                        $new_xml .= str_replace('>','>',
  //                                  str_replace('>','    //                                str_replace('"','"',
      //                              str_replace('&','&',
        //                            $cdata))));
                        $state = 'out';
                    } else {
                        $cdata .= $state . $v;
                        $state = 'in';
                    }
                break;
            } // switch
        }
        //
        // Return.
        //
            return $new_xml;

/**
* 递归方式的对变量中的特殊字符进行转义
*
* @access  public
* @param   mix     $value
*
* @return  mix
*/
function addslashes_deep($value)
{
if (empty($value))
{
return $value;
}
else
{
return is_array($value) ? array_map('addslashes_deep', $value) : addslashes($value);
}
}

包含DOMElement, DOMDocument, DOMNodeList, DOMNode几个 component.

if ($this->ch == '{')
{
$this->next();

对于 DOMNode有nodeValue, nodeType, nodeName的成员函数。

return $a;

所以决定还是将这段解析 的代码换成DOM XML,本身 DOM的处理还是比较简单的,

function decode($text,$type=0) // 默认type=0返回obj,type=1返回array
{
if (empty($text))
{
return '';
}
elseif (!is_string($text))
{
return false;
}

而且这个问题很难修正, 你不知道下次会不会有4, 5个]出现。

return $o;
}
elseif ($this->ch != ',')
{
break;
}

首先先用loadXML将string转化为DOMDocument对像, 再用getElementsByTagName转化为DOMNodeList对像, 再使用->item(0)转化为DOMNOde, 然后就可以使用上面的三种方法了。

print_r($cmt);

最近发现,总是有alert发出来, 说是simplexml解析出错。

}

case 'r':
$s .= ' ';
break;

if ($this->ch != ':')
{
break;
}

return null;
}
break;
}

$s .= chr($u);
break;
case ''':
$s .= ''';
break;
default:
$s .= $this->ch;
}
}
else
{
$s .= $this->ch;
}
}
}

case 'object':
foreach (get_object_vars($arg) AS $i => $v)
{
$v = $this->encode($v);

}
elseif ($this->ch != ',')
{
break;
}

/**
* 将对象成员变量或者数组的特殊字符进行转义
*
* @access   public
* @param    mix        $obj      对象或者数组
* @author   Xuan Yan
*
* @return   mix                  对象或者数组
*/
function addslashes_deep_obj($obj)
{
if (is_object($obj) == true)
{
foreach ($obj AS $key => $val)
{
$obj->$key =$this-> addslashes_deep($val);
}
}
else
{
$obj = addslashes_deep($obj);
}

if ($this->ch == ']')
{
$this->next();

while ($this->next() && $this->ch >= '0' && $this->ch <= '9')
{
$n .= $this->ch;
}
}

if ($this->ch == '}')
{
$this->next();

return addslashes_deep_obj($result);
}

ecshop解析json类

return $o;
}

/**
* Gets the properties of the given object recursion
*
* @access private
*
* @return array
*/
function object_to_array($obj)
{
$_arr = is_object($obj) ? get_object_vars($obj) : $obj;
foreach ($_arr as $key => $val)
{
$val = (is_array($val) || is_object($val)) ? $this->object_to_array($val) : $val;
$arr[$key] = $val;
}
return $arr;
}

case '[':
return $this->arr();

while ($this->ch)
{
$k = $this->str();

if (strlen($s) > 0)
{
$s .= ',';
}
$s .= $this->encode($i) . ':' . $v;
}

$result = empty($type) ? $return : $this->object_to_array($return);

while ($this->ch)
{
$k = $this->str();

$returnValue = '{' . $s . '}';
}
break;

/**
* triggers a PHP_ERROR
*
* @access   private
* @param    string    $m    error message
*
* @return   void
*/
function error($m)
{
trigger_error($m . ' at offset ' . $this->at . ': ' . $this->text, E_USER_ERROR);
}

/**
* handles objects
*
* @access  public
*
* @return  void
*/
function obj()
{
$k = '';
$o = new StdClass();

case 'u':
$u = 0;

/**
* handles strings
*
* @access  private
*
* @return  void
*/
function str()
{
$i = '';
$s = '';
$t = '';
$u = '';

default:
return ($this->ch >= '0' && $this->ch <= '9') ? $this->num() : $this->word();
}
}

return $a;
}

/**
* returns the next character of a JSON string
*
* @access  private
*
* @return  string
*/
function next()
{
$this->ch = !isset($this->text{$this->at}) ? '' : $this->text{$this->at};
$this->at++;

case 'n':
$s .= ' ';
break;

if ($_force && EC_CHARSET == 'utf-8' && function_exists('json_encode'))
{
return json_encode($arg);
}

if ($this->next() == 'r' && $this->next() == 'u' && $this->next() == 'e')
{
$this->next();

if ($this->ch != ':')
{
break;
}

/**
* 递归方式的对变量中的特殊字符去除转义
*
* @access  public
* @param   mix     $value
*
* @return  mix
*/
function stripslashes_deep($value)
{
if (empty($value))
{
return $value;
}
else
{
return is_array($value) ? array_map('stripslashes_deep', $value) : stripslashes($value);
}
}

$returnValue = '[' . $s . ']';
}
else
{
foreach ($arg AS $i => $v)
{
if (strlen($s) > 0)
{
$s .= ',';
}
$s .= $this->encode($i) . ':' . $this->encode($arg[$i]);
}

 大多数流行的 Web 服务如 twitter 通过开放 API 来提供数据一样,它总是能够知道如何解析 API 数据的各种传送格式,包括 JSON,XML 等等。

if (!is_numeric($v))
{
$this->error('Bad number');
}
else
{
return $v;
}
}

$this->error('Bad string');
}

case '<':
return $this->assoc();

$cmt = $json->json_str_iconv($string);  //字符转码

case 'string':
$returnValue = '"' . strtr($arg, array(
" "   => 'r',    " "   => 'n',    " "   => 't',     " "   => 'b',
" "   => 'f',    ''   => '\',   '"'    => '"',
"" => 'u0000', " " => 'u0001', " " => 'u0002', " " => 'u0003',
" " => 'u0004', " " => 'u0005', " " => 'u0006', " " => 'u0007',
" " => ' ',     " " => 'u000b', " " => ' ',     " " => 'u000e',
" " => 'u000f', " " => 'u0010', " " => 'u0011', " " => 'u0012',
" " => 'u0013', " " => 'u0014', " " => 'u0015', " " => 'u0016',
" " => 'u0017', " " => 'u0018', " " => 'u0019', " " => 'u001a',
" " => 'u001b', " " => 'u001c', " " => 'u001d', " " => 'u001e',
" " => 'u001f'
)) . '"';
break;

$v += $n;

/**
* handles objects
*
* @access  public
*
* @return  void
*/
function assoc()
{
$k = '';
$a = array();

case '-':
return $this->num();

$this->next();
}
}

case 'boolean':
$returnValue = $arg?'true':'false';
break;

return $s;
}
elseif ($this->ch == '')
{
switch ($this->next())
{
case 'b':
$s .= ' ';
break;

return $obj;
}

本文由美狮美高梅官方网站发布,转载请注明来源

关键词: