观点提取旨在从非结构化的评论文本中提取标准化、结构化的信息,如产品名、评论维度、评论观点等。此处希望大家能够通过自然语言处理的语义情感分析技术判断出一段银行产品评论文本的情感倾向,并能进一步通过语义分析和实体识别,标识出评论所讨论的产品名,评价指标和评价关键词。
import pandas as pd
from ast import literal_eval
train_data = pd.read_csv('./train_data_public.csv')
train_data.drop('Unnamed: 0', axis = 1, inplace = True)
test_data = pd.read_csv('./test_public.csv')
train_data['BIO_anno'] = train_data['BIO_anno'].apply(lambda x:x.split(' '))
train_data['training_data'] = train_data.apply(lambda row: (list(row['text']), row['BIO_anno']), axis = 1)
test_data['testing_data'] = test_data.apply(lambda row: (list(row['text'])), axis = 1)
train_data
training_data_txt = []
testing_data_txt = []
for i in range(len(train_data)):
training_data_txt.append(train_data.iloc[i]['training_data'])