20,848
社区成员




FROM (
select expid(curl,'pid2.properties') as epid,--抽取产品id
expid(exagent(agt)['name'],'browse.properties') as browser,--根据agent头抽取浏览器
expid(exagent(agt)['category'],'category.properties') as category,--根据agent头抽取平台
expid(exagent(agt)['os'],'os.properties') as os,--根据agent头抽取操作系统
uid,
cid
from file_pv_track where pdate = '$date'
) a
INSERT OVERWRITE DIRECTORY '$output1' $sql1
INSERT OVERWRITE DIRECTORY '$output2' $sql2
INSERT OVERWRITE DIRECTORY '$output3' $sql3
sql1="select '$date',
a.epid,
a.os,
count(1) as pv,
count(distinct a.cid) as uv,
count(distinct a.uid) as l_uv
group by a.epid,a.os";
sql2="select '$date',
a.epid,
a.category,
count(1) as pv,
count(distinct a.cid) as uv,
count(distinct a.uid) as l_uv
group by a.epid,a.category";
sql3="select '$date',
a.epid,
a.browser,
count(1) as pv,
count(distinct a.cid) as uv,
count(distinct a.uid) as l_uv
group by a.epid,a.browser";
ABSTRACT SYNTAX TREE:
(TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME file_pv_track))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION expid (TOK_TABLE_OR_COL curl) 'pid2.properties') epid) (TOK_SELEXPR (TOK_FUNCTION expid ([ (TOK_FUNCTION exagent (TOK_TABLE_OR_COL agt)) 'name') 'browse.properties') browser) (TOK_SELEXPR (TOK_FUNCTION expid ([ (TOK_FUNCTION exagent (TOK_TABLE_OR_COL agt)) 'category') 'category.properties') category) (TOK_SELEXPR (TOK_FUNCTION expid ([ (TOK_FUNCTION exagent (TOK_TABLE_OR_COL agt)) 'os') 'os.properties') os) (TOK_SELEXPR (TOK_TABLE_OR_COL uid)) (TOK_SELEXPR (TOK_TABLE_OR_COL cid))) (TOK_WHERE (= (TOK_TABLE_OR_COL pdate) '2014-07-14')))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR '/output/online/fact_agent/2014-07-14/1405393817/fact_agent_os/')) (TOK_SELECT (TOK_SELEXPR '2014-07-14') (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) epid)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) os)) (TOK_SELEXPR (TOK_FUNCTION count 1) pv) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL cid)) uv) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL uid)) l_uv)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) epid) (. (TOK_TABLE_OR_COL a) os))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR '/output/online/fact_agent/2014-07-14/1405393817/fact_agent_platform/')) (TOK_SELECT (TOK_SELEXPR '2014-07-14') (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) epid)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) category)) (TOK_SELEXPR (TOK_FUNCTION count 1) pv) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL cid)) uv) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL uid)) l_uv)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) epid) (. (TOK_TABLE_OR_COL a) category))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR '/output/online/fact_agent/2014-07-14/1405393817/fact_agent_browser/')) (TOK_SELECT (TOK_SELEXPR '2014-07-14') (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) epid)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) browser)) (TOK_SELEXPR (TOK_FUNCTION count 1) pv) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL cid)) uv) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL uid)) l_uv)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) epid) (. (TOK_TABLE_OR_COL a) browser))))
CONSOLE#
STAGE DEPENDENCIES:
Stage-3 is a root stage
Stage-4 depends on stages: Stage-3
Stage-0 depends on stages: Stage-4
Stage-5 depends on stages: Stage-3
Stage-1 depends on stages: Stage-5
Stage-6 depends on stages: Stage-3
Stage-2 depends on stages: Stage-6
CONSOLE#
STAGE PLANS:
Stage: Stage-3
Map Reduce
Alias -> Map Operator Tree:
a:file_pv_track
TableScan
alias: file_pv_track
Select Operator
expressions:
expr: expid(curl, 'pid2.properties')
type: string
expr: expid(exagent(agt)['name'], 'browse.properties')
type: string
expr: expid(exagent(agt)['category'], 'category.properties')
type: string
expr: expid(exagent(agt)['os'], 'os.properties')
type: string
expr: uid
type: string
expr: cid
type: string
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Reduce Output Operator
key expressions:
expr: _col5
type: string
expr: _col4
type: string
sort order: ++
Map-reduce partition columns:
expr: _col5
type: string
expr: _col4
type: string
tag: -1
value expressions:
expr: _col0
type: string
expr: _col3
type: string
expr: 1
type: int
expr: _col2
type: string
expr: _col1
type: string
Reduce Operator Tree:
Forward
Group By Operator
aggregations:
expr: count(VALUE._col2)
expr: count(DISTINCT KEY._col0)
expr: count(DISTINCT KEY._col1)
bucketGroup: false
keys:
expr: VALUE._col0
type: string
expr: VALUE._col1
type: string
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
File Output Operator
compressed: false
GlobalTableId: 0
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Group By Operator
aggregations:
expr: count(VALUE._col2)
expr: count(DISTINCT KEY._col0)
expr: count(DISTINCT KEY._col1)
bucketGroup: false
keys:
expr: VALUE._col0
type: string
expr: VALUE._col3
type: string
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
File Output Operator
compressed: false
GlobalTableId: 0
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Group By Operator
aggregations:
expr: count(VALUE._col2)
expr: count(DISTINCT KEY._col0)
expr: count(DISTINCT KEY._col1)
bucketGroup: false
keys:
expr: VALUE._col0
type: string
expr: VALUE._col4
type: string
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
File Output Operator
compressed: false
GlobalTableId: 0
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
CONSOLE#
Stage: Stage-4
Map Reduce
Alias -> Map Operator Tree:
hdfs://csdn/tmp/hive-hadoop/hive_2014-07-15_11-10-24_836_3002486700983717417-1/-mr-10003
Reduce Output Operator
key expressions:
expr: _col0
type: string
expr: _col1
type: string
sort order: ++
Map-reduce partition columns:
expr: _col0
type: string
expr: _col1
type: string
tag: -1
value expressions:
expr: _col2
type: bigint
expr: _col3
type: bigint
expr: _col4
type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
expr: count(VALUE._col0)
expr: count(VALUE._col1)
expr: count(VALUE._col2)
bucketGroup: false
keys:
expr: KEY._col0
type: string
expr: KEY._col1
type: string
mode: final
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Select Operator
expressions:
expr: '2014-07-14'
type: string
expr: _col0
type: string
expr: _col1
type: string
expr: _col2
type: bigint
expr: _col3
type: bigint
expr: _col4
type: bigint
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
File Output Operator
compressed: false
GlobalTableId: 1
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
CONSOLE#
Stage: Stage-0
Move Operator
files:
hdfs directory: true
destination: /output/online/fact_agent/2014-07-14/1405393817/fact_agent_os
CONSOLE#
Stage: Stage-5
Map Reduce
Alias -> Map Operator Tree:
hdfs://csdn/tmp/hive-hadoop/hive_2014-07-15_11-10-24_836_3002486700983717417-1/-mr-10004
Reduce Output Operator
key expressions:
expr: _col0
type: string
expr: _col1
type: string
sort order: ++
Map-reduce partition columns:
expr: _col0
type: string
expr: _col1
type: string
tag: -1
value expressions:
expr: _col2
type: bigint
expr: _col3
type: bigint
expr: _col4
type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
expr: count(VALUE._col0)
expr: count(VALUE._col1)
expr: count(VALUE._col2)
bucketGroup: false
keys:
expr: KEY._col0
type: string
expr: KEY._col1
type: string
mode: final
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Select Operator
expressions:
expr: '2014-07-14'
type: string
expr: _col0
type: string
expr: _col1
type: string
expr: _col2
type: bigint
expr: _col3
type: bigint
expr: _col4
type: bigint
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
File Output Operator
compressed: false
GlobalTableId: 2
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
CONSOLE#
Stage: Stage-1
Move Operator
files:
hdfs directory: true
destination: /output/online/fact_agent/2014-07-14/1405393817/fact_agent_platform
..........
select '2014-07-07',
a.epid,
a.category,
count(1) as pv,
count(distinct cid) as uv,
count(distinct uid) as l_uv
FROM (
select expid(curl,'pid2.properties') as epid,
expid(exagent(agt)['name'],'browse.properties') as browser,
expid(exagent(agt)['category'],'category.properties') as category,
expid(exagent(agt)['os'],'os.properties') as os,
uid,
cid
from file_pv_track where pdate = '2014-07-07'
) a
group by a.epid,a.category
ABSTRACT SYNTAX TREE:
(TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME file_pv_track))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION expid (TOK_TABLE_OR_COL curl) 'pid2.properties') epid) (TOK_SELEXPR (TOK_FUNCTION expid ([ (TOK_FUNCTION exagent (TOK_TABLE_OR_COL agt)) 'name') 'browse.properties') browser) (TOK_SELEXPR (TOK_FUNCTION expid ([ (TOK_FUNCTION exagent (TOK_TABLE_OR_COL agt)) 'category') 'category.properties') category) (TOK_SELEXPR (TOK_FUNCTION expid ([ (TOK_FUNCTION exagent (TOK_TABLE_OR_COL agt)) 'os') 'os.properties') os) (TOK_SELEXPR (TOK_TABLE_OR_COL uid)) (TOK_SELEXPR (TOK_TABLE_OR_COL cid))) (TOK_WHERE (= (TOK_TABLE_OR_COL pdate) '2014-07-07')))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR '/output/debug/t')) (TOK_SELECT (TOK_SELEXPR '2014-07-07') (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) epid)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) category)) (TOK_SELEXPR (TOK_FUNCTION count 1) pv) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL cid)) uv) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL uid)) l_uv)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) epid) (. (TOK_TABLE_OR_COL a) category))))
CONSOLE#
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
CONSOLE#
STAGE PLANS:
Stage: Stage-1
Map Reduce
Alias -> Map Operator Tree:
a:file_pv_track
TableScan
alias: file_pv_track
Select Operator
expressions:
expr: expid(curl, 'pid2.properties')
type: string
expr: expid(exagent(agt)['category'], 'category.properties')
type: string
expr: uid
type: string
expr: cid
type: string
outputColumnNames: _col0, _col2, _col4, _col5
Select Operator
expressions:
expr: _col0
type: string
expr: _col2
type: string
expr: _col5
type: string
expr: _col4
type: string
outputColumnNames: _col0, _col2, _col5, _col4
Group By Operator
aggregations:
expr: count(1)
expr: count(DISTINCT _col5)
expr: count(DISTINCT _col4)
bucketGroup: false
keys:
expr: _col0
type: string
expr: _col2
type: string
expr: _col5
type: string
expr: _col4
type: string
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Reduce Output Operator
key expressions:
expr: _col0
type: string
expr: _col1
type: string
expr: _col2
type: string
expr: _col3
type: string
sort order: ++++
Map-reduce partition columns:
expr: _col0
type: string
expr: _col1
type: string
tag: -1
value expressions:
expr: _col4
type: bigint
expr: _col5
type: bigint
expr: _col6
type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
expr: count(VALUE._col0)
expr: count(DISTINCT KEY._col2:0._col0)
expr: count(DISTINCT KEY._col2:1._col0)
bucketGroup: false
keys:
expr: KEY._col0
type: string
expr: KEY._col1
type: string
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Select Operator
expressions:
expr: '2014-07-07'
type: string
expr: _col0
type: string
expr: _col1
type: string
expr: _col2
type: bigint
expr: _col3
type: bigint
expr: _col4
type: bigint
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
File Output Operator
compressed: false
GlobalTableId: 1
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
CONSOLE#
Stage: Stage-0
Move Operator
files:
hdfs directory: true
destination: /output/debug/t