非专业人员可以学会的
程序语言
桌面数据处理和分析的
工具集
操作简单、函数丰富
批量和重复性任务很和繁琐
对复杂的运算支持不足
使用简单、界面流畅美观
计算功能单一,只能做死板的多维分析
完整的编程能力,天然内置于excel
对表格计算支持太差,简单任务也要大段代码
支持表格计算,交互性好,培训班遍地
表格计算不符合自然思维,太多种类表格需要掌握,难度超出非专业人员能力
df = pd.read_csv("../login_data.csv") df["ts"] = pd.to_datetime(df["ts"]).dt.date grouped = df.groupby("userid") aligned_dates = pd.date_range(start=df["ts"].min(), end=df["ts"].max(), freq='d') user_date_wether_con3days = [] for uid, group in grouped: group = group.drop_duplicates('ts') aligned_group = group.set_index("ts").reindex(aligned_dates) consecutive_logins = aligned_group.rolling(window=7) n = 0 date_wether_con3days = [] for r in consecutive_logins: n = 1 if n<7: continue else: ds = r['userid'].isna().cumsum() cont_login_times = r.groupby(ds).userid.count().max() wether_cont3days = 1 if cont_login_times>=3 else 0 date_wether_con3days.append(wether_cont3days) user_date_wether_con3days.append(date_wether_con3days) arr = np.array(user_date_wether_con3days) day7_cont3num = np.sum(arr,axis=0) result = pd.dataframe({'dt':aligned_dates[6:],'cont3_num':day7_cont3num})
真正的记录集合,有交互性
环境复杂,不能直接处理桌面文件,非专业人员难以使用
过程式运算复杂度太高,非专业人员难以掌握
with all_dates as ( select distinct trunc(ts) as login_date from login_data), user_login_counts as ( select userid, trunc(ts) as login_date, (case when count(*)>=1 then 1 else 0 end) as login_count from login_data group by userid, trunc(ts)), whether_login as ( select u.userid, ad.login_date, nvl(ulc.login_count, 0) as login_count from all_dates ad cross join ( select distinct userid from login_data) u left join user_login_counts ulc on u.userid = ulc.userid and ad.login_date = ulc.login_date order by u.userid, ad.login_date), whether_login_rn as ( select userid,login_date,login_count,rownum as rn from whether_login), whether_eq as( select userid,login_date,login_count,rn, (case when lag(login_count,1) over (order by rn)= login_count and login_count =1 and lag(userid,1) over (order by rn)=userid then 0 else 1 end) as wether_e from whether_login_rn ), numbered_sequence as ( select userid,login_date,login_count,rn, wether_e, sum(wether_e) over (order by rn) as lab from whether_eq), consecutive_logins_num as ( select userid,login_date,login_count,rn, wether_e,lab, (select (case when max(count(*))<3 then 0 else 1 end) from numbered_sequence b where b.rn between a.rn - 6 and a.rn and b.userid=a.userid group by b. lab) as cnt from numbered_sequence a) select login_date,sum(cnt) as cont3_num from consecutive_logins_num where login_date>=(select min(login_date) from all_dates) 6 group by login_date order by login_date;
a | |
1 | =file("login_data.csv").import@tc() |
2 | =periods(date(a1.ts),date(a1.m(-1).ts)) |
3 | =a1.group(userid).(~.align(a2,date(ts)).(if(#<7,null,(cnt=~[-6:0].group@i(!~).max(count(~)),if(cnt>=3,1,0))))) |
4 | =msum(a3).~.new(a2(#):dt,int(~):cont3_num).to(7,) |
1. 按车型、月份、年份分组并计数
2. 计算同期比:if(model==model[-1] && month==month[-1],salesvol/salesvol[-1],null)
3. 重新按车型、年、月排序
4. 计算比上期: if(model==model[-1] ,salesvol/salesvol[-1],null)
1. 读入3个表格
2. 用standard关联absent和performance
3. 按照公式计算工资
a | b | c | |
1 | =file("data.xlsx").xlsimport@t() | ||
2 | for a1.fname() | =a1.field(a2) | |
3 | =b2.sum() | =b2.sum(int(~)) | |
4 | =if(b3==c3,b2.mode(),b2.avg()) | ||
5 | =b2.(if(~,~,b4)) | >a1.field(a2,b5) | |
6 | >file("datanew.xlsx").xlsexport@t(a1) |
a | b | c | |
1 | [id,name,sex,postion,birthday,phone,address,postcode] | ||
2 | [c1,c2,f2,c3,c4,d5,c7,c8] | ||
3 | =directory@p("data/*.xlsx") | ||
4 | for a3 | =file(a4).xlsopen() | =b2.(b4.xlscell(~)) |
5 | =@|c4 | ||
6 | =create(${a1.concat@c()}).record(b5) | ||
7 | >file("all.xlsx").xlsexport@t(a6) |