select id, name from t where id = 1
select area,sum(amount) from t group by area
select code, max(con_rise) as longest_up_days from ( select code, count(*) as con_rise from ( select code, dt, sum(updown_flag) over (partition by code order by code, dt) as no_up_days from ( select code, dt, case when cl > lag(cl) over (partition by code order by code, dt) then 0 else 1 end as updown_flag from stock ) ) group by code, no_up_days ) group by code
sql没有设置断点、单步执行这些很常见的调试方法,嵌套多层时就要逐层拆分执行。
spl显得更简单,不再需要循环语句
select code, max(con_rise) as longest_up_days from ( select code, count(*) as con_rise from ( select code, dt, sum(updown_flag) over (partition by code order by code, dt) as no_up_days from ( select code, dt, case when cl > lag(cl) over (partition by code order by code, dt) then 0 else 1 end as updown_flag from stock ) ) group by code, no_up_days ) group by code
a | |
1 | =stock.sort(stockrecords.txt) |
2 | =t(a1).sort(dt) |
3 | =a2.group(code;~.group@i(cl< cl[-1]).max(~.len()):max_increase_days) |
import pandas as pd stock_file = "stockrecords.txt" stock_info = pd.read_csv(stock_file,sep="\t") stock_info.sort_values(by=['code','dt'],inplace=true) stock_group = stock_info.groupby(by='code') stock_info['label'] = stock_info.groupby('code')['cl'].diff().fillna(0).le(0).astype(int).cumsum() max_increase_days = {} for code, group in stock_info.groupby('code'): max_increase_days[code] = group.groupby('label').size().max() – 1 max_rise_df = pd.dataframe(list(max_increase_days.items()), columns=['code', 'max_increase_days'])
a | |
1 | smalldata.txt |
2 | =file(a1).import@t() |
3 | =a2.groups(state;sum(amount):amount) |
a | |
1 | bigdata.txt |
2 | =file(a1).cursor@t() |
3 | =a2.groups(state;sum(amount):amount) |
a | |
1 | bigdata.txt |
2 | =file(a1).cursor@t() |
3 | =a2.groups(state;sum(amount):amount) |
a | |
1 | bigdata.txt |
2 | =file(a1).cursor@tm() |
3 | =a2.groups(state;sum(amount):amount) |
普通笔记本即可流畅运行,无须服务器集群
高压缩率文件存储数据,易于携带
import pandas as pd stock_file = "stockrecords.txt" stock_info = pd.read_csv(stock_file,sep="\t") stock_info.sort_values(by=['code','dt'],inplace=true) stock_group = stock_info.groupby(by='code') stock_info['label'] = stock_info.groupby('code')['cl'].diff().fillna(0).le(0).astype(int).cumsum() max_increase_days = {} for code, group in stock_info.groupby('code'): max_increase_days[code] = group.groupby('label').size().max() – 1 max_rise_df = pd.dataframe(list(max_increase_days.items()), columns=['code', 'max_increase_days'])