importzipfileimportosimportioimportsysimporthashlibimportxml.etree.ElementTree as ETclassAnalyseMindMap:def __init__(self, file_path, mark):
self.file_path=file_path
self.context= ""
if mark == "":
self.mark= " "
elif mark == "#":
self.mark= "#"
elif mark == "*":
self.mark= "*"
#解析xmind数据
defanalyse_xmind(self):
file_name=os.path.basename(self.file_path)ifos.path.isfile(self.file_path):
base_dir=os.path.dirname(self.file_path)
m=hashlib.md5()
file= io.FileIO(self.file_path, 'r')
read_bytes= file.read(1024)while read_bytes != b'':
m.update(read_bytes)
read_bytes= file.read(1024)
file.close()
md5value=m.hexdigest()for dir_name inos.listdir(base_dir):if dir_name ==md5value:print('已经存在了该文件', md5value)continuefile_zip= zipfile.ZipFile(self.file_path, 'r')for file infile_zip.namelist():
file_zip.extract(file, base_dir+ '/' +md5value)
file_zip.close()
xml_file= os.path.join(base_dir, md5value, 'content.xml')returnself.analyse_xml(xml_file)else:return "{} 不存在".format(file_name)#file_list = os.listdir(self.file_path)
# #for file_name in file_list:
#print(file_name)
#if os.path.splitext(file_name)[1] == '.xmind':
#print(file_name)
#zip_file = os.path.join(self.file_path, file_name)
#m = hashlib.md5()
#file = io.FileIO(zip_file, 'r')
#read_bytes = file.read(1024)
#while read_bytes != b'':
#m.update(read_bytes)
#read_bytes = file.read(1024)
#file.close()
#md5value = m.hexdigest()
#for dir_name in os.listdir(r'Upload/'):
#if dir_name == md5value:
#print('已经存在了该文件', md5value)
#continue
#file_zip = zipfile.ZipFile(zip_file, 'r')
#for file in file_zip.namelist():
#file_zip.extract(file, r'./' + md5value)
#file_zip.close()
#xml_file = os.path.join('./', md5value, 'content.xml')
#self.analyse_xml(xml_file)
#os.remove(file_name)
#解析xml文件
defanalyse_xml(self, xml_file):try:
tree=ET.parse(xml_file)#获得根节点
root =tree.getroot()except Exception as e: #捕获除与程序退出sys.exit()相关之外的所有异常
print("parse test.xml fail!")
sys.exit()
pre_tag= '{' + root.tag.split('{')[1].split('}')[0] + '}'title_path= pre_tag + 'sheet/' + pre_tag + 'topic/' + pre_tag + 'title'
print("h1.", root.find(title_path).text)
self.context= "\nh1." + root.find(title_path).text + "\n"plain_path= pre_tag + 'sheet/' + pre_tag + 'topic/' + pre_tag + 'notes/' + pre_tag + 'plain'
if root.find(plain_path) is notNone:print("
备注:" + root.find(plain_path).text + "")
self.context+= "
备注:" + root.find(plain_path).text + "" + "\n\n"third_path= pre_tag + 'sheet/' + pre_tag + 'topic/' + pre_tag + 'children'num= 1 #对分层进行标记
for first_topic inroot.findall(third_path):
self.recursive_xml(first_topic, pre_tag, num)returnself.context#递归调用获取元素值
defrecursive_xml(self, root, pre_tag, num):
topics_path= pre_tag + 'topics' #一个children下面可能会有多个topics,所以需要循环一下
for topics inroot.findall(topics_path):
topic_path= pre_tag + 'topic'
for topic intopics.findall(topic_path):
title_path= pre_tag + 'title'
if num > 1:print(self.mark * (num - 1), topic.find(title_path).text)
self.context+= self.mark * (num - 1) + " " + topic.find(title_path).text + "\n"
else:print("h3.", topic.find(title_path).text)
self.context+= "\nh3." + topic.find(title_path).text + '\n\n'plain_path= pre_tag + 'notes/' + pre_tag + 'plain'
if topic.find(plain_path) is notNone:print("
备注:"+topic.find(plain_path).text + "")
self.context+= "
备注:"+topic.find(plain_path).text+"" + "\n\n"label_path= pre_tag + 'labels/' + pre_tag + 'label'
if topic.find(label_path) is notNone:print("-->标签:", topic.find(label_path).text + "<--")
self.context+= "-->标签:" + topic.find(label_path).text + "<--" + "\n\n"children_path= pre_tag + 'children'
for new_topic intopic.findall(children_path):
self.recursive_xml(new_topic, pre_tag, num+1)#解析freemind的xml文件
defanalyse_mm_xml(self):ifos.path.isfile(self.file_path):try:
tree=ET.parse(self.file_path)#获得根节点
root =tree.getroot()except Exception as e: #捕获除与程序退出sys.exit()相关之外的所有异常
print("parse test.xml fail!")
sys.exit()
node_path= "node"num= 1
for node inroot.findall(node_path):print("h1.", node.attrib['TEXT'])
self.context+= "\nh1." + node.attrib['TEXT'] + "\n"
if node.find('richcontent') is notNone:
context_p= 'richcontent/html/body/p'
print('
备注:', node.find(context_p).text.replace(' ', '').replace('\n', ''), '')
self.context+= '
备注:' + node.find(context_p).text.replace(' ', '').replace('\n', '') + '' + "\n\n"self.recursive_node(node, num)returnself.context#递归运行查看结果
defrecursive_node(self, root, num):
node_path= 'node'richcontent_path= 'richcontent'
for node inroot.findall(node_path):if 'TEXT' innode.attrib:if num > 1:print(self.mark * (num - 1), node.attrib['TEXT'])
self.context+= self.mark * (num - 1) + ' ' + node.attrib['TEXT'] + "\n"
else:print("h3.", node.attrib['TEXT'])
self.context+= "\nh3." + node.attrib['TEXT'] + "\n\n"
if node.find(richcontent_path) is notNone:
context_p= 'richcontent/html/body/p'context_out= ''
for p innode.findall(context_p):
context_out+= p.text.replace(' ', '').replace('\n', '') + '\n'
print('
备注:', context_out, '')
self.context+= '
备注:' + context_out + '' + "\n\n"
if node.find(node_path) is notNone:
self.recursive_node(node, num+ 1)if __name__ == '__main__':
file_path= r'C:\path\to\file.xmind'amm= AnalyseMindMap(file_path, "#")
amm.analyse_xmind()
file_path= r'C:\path\to\file.mm'amm= AnalyseMindMap(file_path, "#")
amm.analyse_mm_xml()