2016년 8월부터 2017년 7월까지의 Google merchandise store 데이터를 사용하였습니다.
구글 빅쿼리를 이용하여 전처리를 진행하였습니다.
유입 채널 데이터 추출
select date,
fullVisitorId,
visitId,
trafficSource.campaign,
trafficSource.source,
trafficSource.medium,
channelGrouping,
h.transaction.transactionRevenue
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_*`,
UNNEST(hits) as h
where parse_date('%Y%m%d', _TABLE_SUFFIX)
BETWEEN DATE('2016-08-01') AND DATE('2017-07-31') ;
구매 퍼널 데이터 추출
select date,
fullVisitorId,
visitId,
case h.eCommerceAction.action_type
when '2' then 'product view'
when '3' then 'add to cart'
when '5' then 'purchase started'
when '6' then 'purchase completed'
end as action,
h.transaction.transactionRevenue
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_*`,
UNNEST(hits) as h
where parse_date('%Y%m%d', _TABLE_SUFFIX)
BETWEEN DATE('2016-08-01') AND DATE('2017-07-31')
and h.eCommerceAction.action_type in ('2', '3', '5', '6') ;