和xml parser类似三个主要对象
datasource
parser
targetObject
parser在iterate datasource的时候碰到不同的字符相当于产生不同的事件,有不同的动作,parser会 维护一些状态,更新targetObject,用stack维护层级,栈顶是当前容器
def parseJson(json):
stack, key = [[]], [None]
data_start, i, inQuote= -1, 0, False
def add(item, isContainer):
if not isContainer:
if item.isdigit() : item = int(item)
elif item[0] == '"': item = item[1:-1]
if key[0] == None: stack[-1].append(item)
else:
if key[0].isdigit(): key[0] = int(key[0])
elif key[0][0] == '"': key[0] = key[0][1:-1]
stack[-1][key[0]] = item
key[0] = None
if isContainer: stack.append(item)
for i in xrange(len(json)):
if json[i] == ' ' or json[i] != '"' and inQuote: continue
elif json[i] == '"': inQuote = not inQuote
if json[i] == ',':
if data_start != -1:
add(json[data_start:i].strip(), False)
data_start = -1
elif json[i] == ':':
key[0] = json[data_start:i].strip()
elif json[i] not in '{}[] ':
if i == 0 or json[i - 1] in '{[, :': data_start = i
elif json[i] in '{[':
add([] if json[i] == '[' else {}, True)
elif json[i] in '}]':
if data_start != -1:
add(json[data_start:i].strip(), False)
data_start = -1
stack.pop()
return stack[-1][0] if len(stack[-1]) > 0 else None
更加精确,支持验证的版本
思路:维护一些状态,随着输入会改变这些状态
wordStart 和word用于分词, wordStart > 0 and word == '' 意味着分词开始还没结束, word !='' and wordStart == -1, 已经分出一个词,待使用
inQuote 代表是否处在引号的范围内,优先处理,因为引号内可以有特殊字符,且其特殊性被取消
expectNext,代表是否需要下一个项目,由“,”触发
算法分3大块
1)先处理引号相关的状态,因为引号内的语法字符被当作普通字符,强制分词,word的起始就是两边的引号
2)不在引号氛围内的,
非语法字符(alphabet字符):这一部分主要是维护分词状态
语法字符:
[{,产生一个新的容器,加到当前容器中,同时expectNext = false,表示之前的expectNext满足了
:当前词word应该被当作一个key,validation:key必须是空,且word不为空,
,标志一个item结束,把当前词加到容器,如果当前词为空,且expectNext 未满足(expectNext==True) 则说明上一个item为空,不允许。同时标志必须后面再跟一item, expectNext = True
]}标志一个item结束,把当前词加到容器,也标志当前容器结束,stack.pop()
如何把item 加到容器?如果key为空,表示容器是个list,append,如果key有值,表示容器是dict,dict[key] = word。如果item本身是容器,则push到栈顶,成为当前容器。item是word:如果是带引号的,则去除引号作为str,如果是不带引号的,如果是数,转换成int
def parseJson(s):
stack, wordStart, word, key, expectNext, inQuote, sep = [[]], -1, '', [None], False, False, '[]{}:, \t'
def add(item, container):
if not container: item = int(item) if item.isdigit() else (item[1:-1] if item[0]=='"' else item)
if key[0] == None: stack[-1].append(item)
else:
if key[0].isdigit(): key[0] = int(key[0])
elif key[0][0] == '"': key[0] = key[0][1:-1]
stack[-1][key[0]] = item
key[0] = None
if container: stack.append(item)
for i in xrange(len(s)):
if inQuote:
if s[i] == '"': inQuote, word, wordStart = False, s[wordStart : i + 1], -1
elif s[i] == '"': inQuote, wordStart = True, i
elif s[i] not in sep:
if i == 0 or s[i - 1] in sep: wordStart = i
if i == len(s) - 1 or s[i + 1] in sep: word, wordStart = s[wordStart : i + 1], -1
elif s[i] == ':':
if key[0] != None or word == '': raise Exception('key error.')
key[0], word = word, ''
elif s[i] == ',':
if word != '':
add(word, False)
word, expectNext = '', True
elif expectNext: raise Exception('empty entry.')
elif s[i] in '{[':
add([] if s[i] == '[' else {}, True)
expectNext = False
elif s[i] in '}]':
if word != '':
add(word, False)
word, expectNext = '', False
elif expectNext: raise Exception('empty entry.')
stack.pop()
if len(stack) != 1: raise Exception('unmatched.')
return stack[0][-1] if len(stack[0]) > 0 else None
递归版本的
class JsonParser:
def __init__(self):
self.__i = 0
def parse(self, json):
while self.__i < len(json) and json[self.__i] in '\t\n ': self.__i += 1
if self.__i < len(json) and json[self.__i] == ',': self.__i += 1
while self.__i < len(json) and json[self.__i] in '\t\n ': self.__i += 1
if self.__i == len(json): raise Exception('empty entry.')
if json[self.__i] not in '{[':
if json[self.__i] == '"':
self.__i += 1
wordStart = self.__i
while self.__i < len(json) and json[self.__i] != '"': self.__i += 1
if self.__i == len(json): raise Exception('unmatched quote.')
word = json[wordStart: self.__i]
while self.__i < len(json) and json[self.__i] not in ',:]}':
self.__i += 1
return word
else:
wordStart = self.__i
while self.__i < len(json) and json[self.__i] not in ',:]}':
self.__i += 1
word = json[wordStart: self.__i].strip()
return int(word) if json[wordStart].isdigit() else word
elif json[self.__i] == '[':
result = []
self.__i += 1
while self.__i < len(json) and json[self.__i] != ']':
result.append(self.parse(json))
if self.__i == len(json) or json[self.__i] not in ',]': raise Exception('invaild list item end.')
if self.__i == len(json) or json[self.__i] != ']': raise Exception('invaild list end.')
self.__i += 1
return result
elif json[self.__i] == '{':
result = {}
self.__i += 1
while self.__i < len(json) and json[self.__i] != '}':
key = self.parse(json)
if self.__i == len(json) or json[self.__i] != ':': raise Exception('invalid dict key end.')
self.__i += 1
value = self.parse(json)
if self.__i == len(json) or json[self.__i] not in ',}': raise Exception('invaild dict item end.')
result[key] = value
if self.__i == len(json) or json[self.__i] != '}': raise Exception('invalid dict end.')
self.__i += 1
return result
else:
raise Exception('bad format.')