由于工作需要写一个跟踪产品调价前后销量变化的程序。
在写的过程中有用到时间戳加减法,故以此记录。
下面是主代码:
# -*- coding:UTF-8 -*-
import pandas as pd
import os
import datetime
df_id = pd.read_excel('./跟踪商品.xlsx')
file_lst = os.listdir('./database')
def concat_file(file_lst):
"""
::合并数据源::
"""
lst = []
data_lst = []
for i in file_lst:
filename = './database' + '/' + i
lst.append(filename) # 获取文件名列表
for x in lst:
df = pd.read_excel(x)
data_lst.append(df) # 获取所有文件的Dataframe对象
#df1 = pd.read_excel(file1)
#df2 = pd.read_excel(file2)
df3 = pd.concat(data_lst, axis=0)
return df3
def create_dict(data):
"""
:将编码与调价日期构造为字典:
"""
lst1 = data['产品编码'].tolist()
lst2 = data['调价时间'].tolist()
dic1 = dict(zip(lst1, lst2))
return dic1
def get_before_date(date):
"""
:获取目标时间的前一周日期:
"""
delta7 = datetime.timedelta(days=7)
if type(date) == str:
start_time = datetime.datetime.strftime(date, '%Y/%m/%d')
end_time = start_time - delta7
else:
end_time = date - delta7
return end_time
def get_before_data(data, id_, dict_id):
'''
:以调价日期为分界线,获取调价前7天销量:
'''
sub_time = dict_id[id_]
end_time = get_before_date(sub_time)
df = data[(data['日期']>end_time) & (data['日期']<=sub_time)]
df1 = df.groupby(['商品代码']).sum()
try:
df1 = df1.loc[id_]
sub = df1['销售数量']
#print('商品代码',df1)
except KeyError:
sub = 0.1
return sub
def get_after_date(date):
"""
:获取目标时间的后一周日期:
"""
delta7 = datetime.timedelta(days=7)
#print(date)
if type(date) == str:
start_time = datetime.datetime.strftime(date, '%Y/%m/%d')
end_time = start_time + delta7
else:
end_time = date + delta7
return end_time
def get_after(data, id_, dict_id):
'''
:以调价日期为分界线,获取调价后的销售数据,以周为单位:
'''
sub_time = dict_id[id_]
end_time = get_after_date(sub_time)
date_lst = data['日期'].tolist()
sub_lst = []
while end_time in date_lst:
try:
df = data[(data['日期']>sub_time) & (data['日期']<=end_time)]
df1 = df.groupby(['商品代码']).sum()
df1 = df1.loc[id_]
sub = df1['销售数量']
except KeyError:
sub = 0.1
sub_lst.append(sub)
sub_time = end_time # 起始日期等于上一周的终止日期
end_time = get_after_date(end_time)
return sub_lst
def input_excel(sub, week_sub, id, dict):
'''
:将数据传入表格中:
'''
week_counts = len(week_sub) # 获得该ID调价后的周数
res = []
result = []
result.append(id) # 传入产品id构建数组
result.append(dict[id])
result.append(sub) # 传入产品调价前一周销量
for i in week_sub:
result.append(i) # 传入调价后每周的销量
data = pd.DataFrame()
res.append(result)
data = data.append(res, ignore_index=True) # 构建Dataframe对象
columns = ['产品编码', '调价时间','调价前一周销量']
n = 1
while n <= week_counts:
columns.append(f'第{n}周')
n += 1
# print('columns:', columns)
data.columns = columns
# print(data)
return data
def merge_excel(data, df):
data = data.merge(df, how='outer')
print(data)
return data
if __name__ == "__main__":
data = concat_file(file_lst)
id_time = create_dict(df_id)
lst = df_id['产品编码'].tolist()
new_data = pd.DataFrame()
new_df = df_id
df_lst = pd.DataFrame()
for i in lst:
sub = get_before_data(data, i, id_time)
week_sub = get_after(data, i, id_time)
df = input_excel(sub, week_sub, i, id_time)
#new_df.drop_duplicates(subset=['产品编码','调价时间'],keep='last',inplace=True)
df_lst = df_lst.append(df)
dff = merge_excel(new_df, df_lst)
dff.to_excel('./调价跟踪.xlsx')
其中:
delta7 = datetime.timedelta(days=7)
此段代码则是将7天的时间长度存入变量,以便下文操作,当然,你可以是1、是3、是5、是30都行,此处为7主要是需求要求周期为一周。
while end_time in date_lst:
try:
df = data[(data['日期']>sub_time) & (data['日期']<=end_time)]
df1 = df.groupby(['商品代码']).sum()
df1 = df1.loc[id_]
sub = df1['销售数量']
except KeyError:
sub = 0.1
sub_lst.append(sub)
sub_time = end_time # 起始日期等于上一周的终止日期
end_time = get_after_date(end_time)
由于需求是长期跟踪,没有明确结束时间,此处用while循环来判断一个产品在调价后一周期后的时间是否存在于数据源中来确定计算到第几周。