import pandas as pd pd.set_option('display.max_columns', None) import re # finallist=[] # for i in range(0,len(ISIN)): # DMdata = DM[i] # QBdata = QB[i] # DMdata = DMdata.split(';') # QBdata = QBdata.split(',') # # if sorted(DMdata)!=sorted(QBdata): # finallist.append([ISIN[i],DM[i],QB[i]]) # # # print(len(finallist)) # df = pd.DataFrame(finallist, columns=['ISIN', 'DM','QB']) # df.to_excel(r'C:\Users\Admin\Desktop\final.xlsx',index=False) # QBdata = pd.read_excel(r'C:\Users\Admin\Desktop\trans主承销商.xlsx',sheet_name='qb数据') data= pd.read_excel(r'C:\Users\Admin\Desktop\test.xlsx',sheet_name=0) data=data.fillna('--') data = data.reset_index(drop=True) data = data[['BnD','global_coordinators','lead_managers']] data = data.to_dict('list') BND =data['BnD'] global_coordinators = data['global_coordinators'] lead_managers= data['lead_managers'] uniqueBND = list(set(BND)) GCSlist=[] for GCs in range(0,len(global_coordinators)): listGCS = re.split(',|、|,',global_coordinators[GCs]) for ele in listGCS: GCSlist.append(ele) uniqueGCS = list(set(GCSlist)) LMSlist=[] for LMs in range(0,len(lead_managers)): listLMS = re.split(',|、|,',lead_managers[LMs]) for ele in listLMS: LMSlist.append(ele) uniqueLMS = list(set(LMSlist)) print(len(uniqueGCS),len(uniqueBND),len(uniqueLMS)) print(len(uniqueLMS+uniqueGCS+uniqueBND)) finallist = uniqueLMS+uniqueGCS+uniqueBND finallist = list(set(finallist)) finallist.remove('') finallist.remove('--') finallist.sort() df = pd.DataFrame(finallist, columns=['names']) df.to_excel(r'C:\Users\Admin\Desktop\names.xlsx',index=False)