This SQL query combines and processes data from multiple Google Play app statistics tables (installs, ratings, and crashes) to create a comprehensive app version report. It performs several operations including joining the tables, calculating rolling metrics, filling in missing values, and creating derived metrics such as net device installs. The query handles different granularities of data and ensures consistency across various app versions and dates.
IntegrationFeaturizationCleaningAggregationOtherWITH installs AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_installs_app_version
), ratings AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_ratings_app_version
), crashes AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_crashes_app_version
), install_metrics AS (
SELECT
*,
SUM(device_installs) OVER (PARTITION BY app_version_code, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_installs,
SUM(device_uninstalls) OVER (PARTITION BY app_version_code, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_uninstalls
FROM installs
), app_version_join AS (
SELECT
COALESCE(install_metrics.date_day, ratings.date_day, crashes.date_day) AS date_day, /* these 3 columns are the grain of this model */
COALESCE(install_metrics.app_version_code, ratings.app_version_code, crashes.app_version_code) AS app_version_code,
COALESCE(install_metrics.package_name, ratings.package_name, crashes.package_name) AS package_name,
COALESCE(install_metrics.active_devices_last_30_days, 0) AS active_devices_last_30_days, /* metrics based on unique devices + users */
COALESCE(install_metrics.device_installs, 0) AS device_installs,
COALESCE(install_metrics.device_uninstalls, 0) AS device_uninstalls,
COALESCE(install_metrics.device_upgrades, 0) AS device_upgrades,
COALESCE(install_metrics.user_installs, 0) AS user_installs,
COALESCE(install_metrics.user_uninstalls, 0) AS user_uninstalls,
COALESCE(crashes.crashes, 0) AS crashes, /* metrics based on events. a user or device can have multiple events in one day */
COALESCE(crashes.anrs, 0) AS anrs,
COALESCE(install_metrics.install_events, 0) AS install_events,
COALESCE(install_metrics.uninstall_events, 0) AS uninstall_events,
COALESCE(install_metrics.update_events, 0) AS update_events,
install_metrics.total_device_installs,
install_metrics.total_device_uninstalls,
ratings.average_rating, /* this one actually isn't rolling but we won't coalesce days with no reviews to 0 rating */
ratings.rolling_total_average_rating
FROM install_metrics
FULL OUTER JOIN ratings
ON install_metrics.date_day = ratings.date_day
AND install_metrics.package_name = ratings.package_name
AND /* choosing an arbitrary negative integer as we can't coalesce with a string like 'null_version_code'. null app version codes will cause fanout */ COALESCE(install_metrics.app_version_code, -5) = COALESCE(ratings.app_version_code, -5) /* this really doesn't happen IRL but let's be safe */
FULL OUTER JOIN crashes
ON COALESCE(install_metrics.date_day, ratings.date_day) = crashes.date_day
AND COALESCE(install_metrics.package_name, ratings.package_name) = crashes.package_name
AND COALESCE(install_metrics.app_version_code, ratings.app_version_code, -5) = COALESCE(crashes.app_version_code, -5)
), create_partitions /* to backfill in days with NULL values for rolling metrics, we'll create partitions to batch them together with records that have non-null values */ /* we can't just use last_value(ignore nulls) because of postgres :/ */ AS (
SELECT
*,
SUM(CASE WHEN rolling_total_average_rating IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY app_version_code, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS rolling_total_average_rating_partition,
SUM(CASE WHEN total_device_installs IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY app_version_code, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_installs_partition,
SUM(CASE WHEN total_device_uninstalls IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY app_version_code, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_uninstalls_partition
FROM app_version_join
), fill_values /* now we'll take the non-null value for each partitioned batch and propagate it across the rows included in the batch */ AS (
SELECT
date_day,
app_version_code,
package_name,
active_devices_last_30_days,
device_installs,
device_uninstalls,
device_upgrades,
user_installs,
user_uninstalls,
crashes,
anrs,
install_events,
uninstall_events,
update_events,
average_rating,
FIRST_VALUE(rolling_total_average_rating) OVER (PARTITION BY rolling_total_average_rating_partition, app_version_code, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS rolling_total_average_rating,
FIRST_VALUE(total_device_installs) OVER (PARTITION BY total_device_installs_partition, app_version_code, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_installs,
FIRST_VALUE(total_device_uninstalls) OVER (PARTITION BY total_device_uninstalls_partition, app_version_code, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_uninstalls
FROM create_partitions
), final AS (
SELECT
date_day,
app_version_code,
package_name,
device_installs,
device_uninstalls,
device_upgrades,
user_installs,
user_uninstalls,
crashes,
anrs,
install_events,
uninstall_events,
update_events,
active_devices_last_30_days,
average_rating,
rolling_total_average_rating, /* leave null if there are no ratings yet */
COALESCE(total_device_installs, 0) AS total_device_installs, /* the first day will have NULL values, let's make it 0 */
COALESCE(total_device_uninstalls, 0) AS total_device_uninstalls,
COALESCE(total_device_installs, 0) /* calculate difference rolling metric */ - COALESCE(total_device_uninstalls, 0) AS net_device_installs
FROM fill_values
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
active_devices_last_30_days | INT64 | The number of active devices on which your app is installed. An active device is one that has been turned on at least once in the past 30 days. |
app_version_code | INT64 | Integer value of the version of the app being reported on. |
device_installs | INT64 | Devices on which users installed your app on this day. An individual user can have multiple device installs. |
device_uninstalls | INT64 | The number of devices from which users uninstalled your app on a given day. |
device_upgrades | INT64 | The number of devices from which users upgraded your app on a given day. |
user_installs | INT64 | The number of users who installed your app and did not have it installed on any other devices at the time on a given day. |
user_uninstalls | INT64 | The number of users who uninstalled your app from all of their devices on this day. |
date_day | DATE | The date on which the data is reported. |
install_events | INT64 | The number of times your app was installed, including devices on which the app had been installed previously. This does not include pre-installs or device reactivations. |
package_name | STRING | The package name of the app owning the report data. |
uninstall_events | INT64 | The number of times your app was uninstalled. This does not include inactive devices. |
update_events | INT64 | The number of times your app was updated. |
anrs | INT64 | Daily application not responding (ANR) reports collected from Android devices whose users have opted in to automatically share usage and diagnostics data. |
crashes | INT64 | Daily crash reports collected from Android devices whose users have opted in to automatically share usage and diagnostics data. |
rolling_total_average_rating | FLOAT64 | Average star rating this app received across all ratings submitted up to and including the past day. For each user submitting a rating, only their most recent rating of the app is counted. |
average_rating | FLOAT64 | Average star rating this app has received across all ratings submitted on a given day. |
total_device_installs | INT64 | Cumulative number of device installs up to and including this day (aggregated on the dimension of this table). This does not take uninstalls into account and may contain duplicate devices. |
total_device_uninstalls | INT64 | Cumulative number of device uninstalls up to and including this day (aggregated on the dimension of this table). This does not take re-installs into account and may contain duplicate devices. |
net_device_installs | INT64 | Cumulative net number of device installs up to and including this day (aggregated on the dimension of this table). This is the difference of `total_device_installs` and `total_device_uninstalls`. |
This SQL query combines and processes data from multiple Google Play Store sources to create a comprehensive country-level report. It integrates install metrics, ratings, and store performance data, calculates rolling totals and averages, fills in missing values, and adds geographical information. The query also computes various metrics such as total installs, uninstalls, store conversions, and net device installs.
IntegrationCleaningFeaturizationAggregationOtherWITH installs AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_installs_country
), ratings AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_ratings_country
), store_performance AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__store_performance_country
), country_codes AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.google_play__country_codes
), install_metrics AS (
SELECT
*,
SUM(device_installs) OVER (PARTITION BY country, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_installs,
SUM(device_uninstalls) OVER (PARTITION BY country, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_uninstalls
FROM installs
), store_performance_metrics AS (
SELECT
*,
SUM(store_listing_acquisitions) OVER (PARTITION BY country_region, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_store_acquisitions,
SUM(store_listing_visitors) OVER (PARTITION BY country_region, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_store_visitors
FROM store_performance
), country_join AS (
SELECT
COALESCE(install_metrics.date_day, ratings.date_day, store_performance_metrics.date_day) AS date_day, /* these 3 columns are the grain of this model */
COALESCE(install_metrics.country, ratings.country, store_performance_metrics.country_region) AS country,
COALESCE(
install_metrics.package_name,
ratings.package_name,
store_performance_metrics.package_name
) AS package_name,
COALESCE(install_metrics.active_devices_last_30_days, 0) AS active_devices_last_30_days, /* metrics based on unique devices + users */
COALESCE(install_metrics.device_installs, 0) AS device_installs,
COALESCE(install_metrics.device_uninstalls, 0) AS device_uninstalls,
COALESCE(install_metrics.device_upgrades, 0) AS device_upgrades,
COALESCE(install_metrics.user_installs, 0) AS user_installs,
COALESCE(install_metrics.user_uninstalls, 0) AS user_uninstalls,
COALESCE(store_performance_metrics.store_listing_acquisitions, 0) AS store_listing_acquisitions,
COALESCE(store_performance_metrics.store_listing_visitors, 0) AS store_listing_visitors,
store_performance_metrics.store_listing_conversion_rate, /* not coalescing if there aren't any visitors */
COALESCE(install_metrics.install_events, 0) AS install_events, /* metrics based on events. a user or device can have multiple installs in one day */
COALESCE(install_metrics.uninstall_events, 0) AS uninstall_events,
COALESCE(install_metrics.update_events, 0) AS update_events,
install_metrics.total_device_installs,
install_metrics.total_device_uninstalls,
ratings.average_rating, /* this one actually isn't rolling but we won't coalesce days with no reviews to 0 rating */
ratings.rolling_total_average_rating,
store_performance_metrics.total_store_acquisitions,
store_performance_metrics.total_store_visitors
FROM install_metrics
FULL OUTER JOIN ratings
ON install_metrics.date_day = ratings.date_day
AND install_metrics.package_name = ratings.package_name
AND /* coalesce null countries otherwise they'll cause fanout with the full outer join */ COALESCE(install_metrics.country, 'null_country') = COALESCE(ratings.country, 'null_country') /* in the source package we aggregate all null country records together into one batch per day */
FULL OUTER JOIN store_performance_metrics
ON store_performance_metrics.date_day = COALESCE(install_metrics.date_day, ratings.date_day)
AND store_performance_metrics.package_name = COALESCE(install_metrics.package_name, ratings.package_name)
AND COALESCE(store_performance_metrics.country_region, 'null_country') = COALESCE(install_metrics.country, ratings.country, 'null_country')
), create_partitions /* to backfill in days with NULL values for rolling metrics, we'll create partitions to batch them together with records that have non-null values */ /* we can't just use last_value(ignore nulls) because of postgres :/ */ AS (
SELECT
*,
SUM(CASE WHEN rolling_total_average_rating IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY country, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS rolling_total_average_rating_partition,
SUM(CASE WHEN total_device_installs IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY country, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_installs_partition,
SUM(CASE WHEN total_device_uninstalls IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY country, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_uninstalls_partition,
SUM(CASE WHEN total_store_acquisitions IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY country, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_store_acquisitions_partition,
SUM(CASE WHEN total_store_visitors IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY country, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_store_visitors_partition
FROM country_join
), fill_values /* now we'll take the non-null value for each partitioned batch and propagate it across the rows included in the batch */ AS (
SELECT
date_day,
country,
package_name,
active_devices_last_30_days,
device_installs,
device_uninstalls,
device_upgrades,
user_installs,
user_uninstalls,
install_events,
uninstall_events,
update_events,
store_listing_acquisitions,
store_listing_visitors,
store_listing_conversion_rate,
average_rating,
FIRST_VALUE(rolling_total_average_rating) OVER (PARTITION BY rolling_total_average_rating_partition, country, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS rolling_total_average_rating,
FIRST_VALUE(total_device_installs) OVER (PARTITION BY total_device_installs_partition, country, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_installs,
FIRST_VALUE(total_device_uninstalls) OVER (PARTITION BY total_device_uninstalls_partition, country, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_uninstalls,
FIRST_VALUE(total_store_acquisitions) OVER (PARTITION BY total_store_acquisitions_partition, country, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_store_acquisitions,
FIRST_VALUE(total_store_visitors) OVER (PARTITION BY total_store_visitors_partition, country, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_store_visitors
FROM create_partitions
), final AS (
SELECT
date_day,
country AS country_short,
COALESCE(country_codes.alternative_country_name, country_codes.country_name) AS country_long,
country_codes.region,
country_codes.sub_region,
package_name,
device_installs,
device_uninstalls,
device_upgrades,
user_installs,
user_uninstalls,
install_events,
uninstall_events,
update_events,
store_listing_acquisitions,
store_listing_visitors,
store_listing_conversion_rate,
active_devices_last_30_days,
average_rating,
rolling_total_average_rating, /* leave null if there are no ratings yet */
COALESCE(total_device_installs, 0) AS total_device_installs, /* the first day will have NULL values, let's make it 0 */
COALESCE(total_device_uninstalls, 0) AS total_device_uninstalls,
COALESCE(total_store_acquisitions, 0) AS total_store_acquisitions,
COALESCE(total_store_visitors, 0) AS total_store_visitors,
ROUND(
CAST(total_store_acquisitions AS DECIMAL(28, 6)) / NULLIF(total_store_visitors, 0),
4
) AS rolling_store_conversion_rate, /* calculate percentage and difference rolling metrics */
COALESCE(total_device_installs, 0) - COALESCE(total_device_uninstalls, 0) AS net_device_installs
FROM fill_values
LEFT JOIN country_codes
ON country_codes.country_code_alpha_2 = fill_values.country
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
active_devices_last_30_days | INT64 | The number of active devices on which your app is installed. An active device is one that has been turned on at least once in the past 30 days. |
country_short | STRING | Two-letter abbreviation of the country where the user’s Google account is registered. |
country_long | STRING | Full name of the country. |
region | STRING | Geographical region of the country. |
sub_region | STRING | Geographical subregion of the country. |
device_installs | INT64 | Devices on which users installed your app on this day. An individual user can have multiple device installs. |
device_uninstalls | INT64 | The number of devices from which users uninstalled your app on a given day. |
device_upgrades | INT64 | The number of devices from which users upgraded your app on a given day. |
user_installs | INT64 | The number of users who installed your app and did not have it installed on any other devices at the time on a given day. |
user_uninstalls | INT64 | The number of users who uninstalled your app from all of their devices on this day. |
date_day | DATE | The date on which the data is reported. |
install_events | INT64 | The number of times your app was installed, including devices on which the app had been installed previously. This does not include pre-installs or device reactivations. |
package_name | STRING | The package name of the app owning the report data. |
uninstall_events | INT64 | The number of times your app was uninstalled. This does not include inactive devices. |
update_events | INT64 | The number of times your app was updated. |
rolling_total_average_rating | FLOAT64 | Average star rating this app received across all ratings submitted up to and including the past day. For each user submitting a rating, only their most recent rating of the app is counted. |
average_rating | FLOAT64 | Average star rating this app has received across all ratings submitted on a given day. |
total_device_installs | INT64 | Cumulative number of device installs up to and including this day (aggregated on the dimension of this table). This does not take uninstalls into account and may contain duplicate devices. |
total_device_uninstalls | INT64 | Cumulative number of device uninstalls up to and including this day (aggregated on the dimension of this table). This does not take re-installs into account and may contain duplicate devices. |
net_device_installs | INT64 | Cumulative net number of device installs up to and including this day (aggregated on the dimension of this table). This is the difference of `total_device_installs` and `total_device_uninstalls`. |
store_listing_acquisitions | INT64 | The number of users that visited your store listing and installed your app, who did not have your app installed on any device. |
store_listing_conversion_rate | FLOAT64 | The percentage of store listing visitors who installed your app on a given day. Note: Does not include visits or installs from users who already have your app installed on another device |
store_listing_visitors | INT64 | The number of users that visited your store listing who did not have your app installed on any device. |
total_store_acquisitions | INT64 | Cumulative number of users that visited your store listing and installed your app, who did not have your app installed on any device prior to this. |
total_store_visitors | INT64 | Cumulative number of users that visited your store listing who did not have your app installed on any device at the time. |
rolling_store_conversion_rate | NUMERIC | Rolling percentage of store listing visitors who installed your app within a given country. The ratio of `total_store_acquisitions` to `total_store_visitors`. |
This SQL query integrates data from Google Play stats for installs and ratings by device. It performs several operations: 1. Combines install and rating data using a full outer join. 2. Calculates rolling metrics for total installs, uninstalls, and average ratings. 3. Handles null values by creating partitions and propagating non-null values across rows within each partition. 4. Computes additional metrics such as net device installs. The result is a comprehensive daily report of app performance metrics by device and package name.
IntegrationFeaturizationCleaningAggregationWITH installs AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_installs_device
), ratings AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_ratings_device
), install_metrics AS (
SELECT
*,
SUM(device_installs) OVER (PARTITION BY device, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_installs,
SUM(device_uninstalls) OVER (PARTITION BY device, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_uninstalls
FROM installs
), device_join AS (
SELECT
COALESCE(install_metrics.date_day, ratings.date_day) AS date_day, /* these 3 columns are the grain of this model */
COALESCE(install_metrics.device, ratings.device) AS device, /* device type */
COALESCE(install_metrics.package_name, ratings.package_name) AS package_name,
COALESCE(install_metrics.active_devices_last_30_days, 0) AS active_devices_last_30_days, /* metrics based on unique devices + users */
COALESCE(install_metrics.device_installs, 0) AS device_installs,
COALESCE(install_metrics.device_uninstalls, 0) AS device_uninstalls,
COALESCE(install_metrics.device_upgrades, 0) AS device_upgrades,
COALESCE(install_metrics.user_installs, 0) AS user_installs,
COALESCE(install_metrics.user_uninstalls, 0) AS user_uninstalls,
COALESCE(install_metrics.install_events, 0) AS install_events, /* metrics based on events. a user or device can have multiple installs in one day */
COALESCE(install_metrics.uninstall_events, 0) AS uninstall_events,
COALESCE(install_metrics.update_events, 0) AS update_events,
install_metrics.total_device_installs,
install_metrics.total_device_uninstalls,
ratings.average_rating, /* this one actually isn't rolling but we won't coalesce days with no reviews to 0 rating */
ratings.rolling_total_average_rating
FROM install_metrics
FULL OUTER JOIN ratings
ON install_metrics.date_day = ratings.date_day
AND install_metrics.package_name = ratings.package_name
AND /* coalesce null device types otherwise they'll cause fanout with the full outer join */ COALESCE(install_metrics.device, 'null_device') = COALESCE(ratings.device, 'null_device') /* in the source package we aggregate all null device-type records together into one batch per day */
), create_partitions /* to backfill in days with NULL values for rolling metrics, we'll create partitions to batch them together with records that have non-null values */ /* we can't just use last_value(ignore nulls) because of postgres :/ */ AS (
SELECT
*,
SUM(CASE WHEN rolling_total_average_rating IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY device, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS rolling_total_average_rating_partition,
SUM(CASE WHEN total_device_installs IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY device, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_installs_partition,
SUM(CASE WHEN total_device_uninstalls IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY device, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_uninstalls_partition
FROM device_join
), fill_values /* now we'll take the non-null value for each partitioned batch and propagate it across the rows included in the batch */ AS (
SELECT
date_day,
device,
package_name,
active_devices_last_30_days,
device_installs,
device_uninstalls,
device_upgrades,
user_installs,
user_uninstalls,
install_events,
uninstall_events,
update_events,
average_rating,
FIRST_VALUE(rolling_total_average_rating) OVER (PARTITION BY rolling_total_average_rating_partition, device, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS rolling_total_average_rating,
FIRST_VALUE(total_device_installs) OVER (PARTITION BY total_device_installs_partition, device, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_installs,
FIRST_VALUE(total_device_uninstalls) OVER (PARTITION BY total_device_uninstalls_partition, device, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_uninstalls
FROM create_partitions
), final AS (
SELECT
date_day,
device,
package_name,
device_installs,
device_uninstalls,
device_upgrades,
user_installs,
user_uninstalls,
install_events,
uninstall_events,
update_events,
active_devices_last_30_days,
average_rating,
rolling_total_average_rating, /* leave null if there are no ratings yet */
COALESCE(total_device_installs, 0) AS total_device_installs, /* the first day will have NULL values, let's make it 0 */
COALESCE(total_device_uninstalls, 0) AS total_device_uninstalls,
COALESCE(total_device_installs, 0) /* calculate difference rolling metric */ - COALESCE(total_device_uninstalls, 0) AS net_device_installs
FROM fill_values
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
active_devices_last_30_days | INT64 | The number of active devices on which your app is installed. An active device is one that has been turned on at least once in the past 30 days. |
device | STRING | Type of device model. May be NULL if users do not consent to being tracked. |
device_installs | INT64 | Devices on which users installed your app on this day. An individual user can have multiple device installs. |
device_uninstalls | INT64 | The number of devices from which users uninstalled your app on a given day. |
device_upgrades | INT64 | The number of devices from which users upgraded your app on a given day. |
user_installs | INT64 | The number of users who installed your app and did not have it installed on any other devices at the time on a given day. |
user_uninstalls | INT64 | The number of users who uninstalled your app from all of their devices on this day. |
date_day | DATE | The date on which the data is reported. |
install_events | INT64 | The number of times your app was installed, including devices on which the app had been installed previously. This does not include pre-installs or device reactivations. |
package_name | STRING | The package name of the app owning the report data. |
uninstall_events | INT64 | The number of times your app was uninstalled. This does not include inactive devices. |
update_events | INT64 | The number of times your app was updated. |
rolling_total_average_rating | FLOAT64 | Average star rating this app received across all ratings submitted up to and including the past day. For each user submitting a rating, only their most recent rating of the app is counted. |
average_rating | FLOAT64 | Average star rating this app has received across all ratings submitted on a given day. |
total_device_installs | INT64 | Cumulative number of device installs up to and including this day (aggregated on the dimension of this table). This does not take uninstalls into account and may contain duplicate devices. |
total_device_uninstalls | INT64 | Cumulative number of device uninstalls up to and including this day (aggregated on the dimension of this table). This does not take re-installs into account and may contain duplicate devices. |
net_device_installs | INT64 | Cumulative net number of device installs up to and including this day (aggregated on the dimension of this table). This is the difference of `total_device_installs` and `total_device_uninstalls`. |
This SQL query combines and processes data from three different sources (installs, ratings, and crashes) related to Google Play app statistics. It joins these sources, calculates rolling metrics for installs, uninstalls, and ratings, and fills in missing values for certain metrics. The query also computes additional metrics such as net device installs and active devices. The final output provides a comprehensive daily report of various app performance metrics segmented by Android OS version and package name.
IntegrationFeaturizationAggregationCleaningWITH installs AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_installs_os_version
), ratings AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_ratings_os_version
), crashes AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_crashes_os_version
), install_metrics AS (
SELECT
*,
SUM(device_installs) OVER (PARTITION BY android_os_version, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_installs,
SUM(device_uninstalls) OVER (PARTITION BY android_os_version, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_uninstalls
FROM installs
), app_version_join AS (
SELECT
COALESCE(install_metrics.date_day, ratings.date_day, crashes.date_day) AS date_day, /* these 3 columns are the grain of this model */
COALESCE(
install_metrics.android_os_version,
ratings.android_os_version,
crashes.android_os_version
) AS android_os_version,
COALESCE(install_metrics.package_name, ratings.package_name, crashes.package_name) AS package_name,
COALESCE(install_metrics.active_devices_last_30_days, 0) AS active_devices_last_30_days, /* metrics based on unique devices + users */
COALESCE(install_metrics.device_installs, 0) AS device_installs,
COALESCE(install_metrics.device_uninstalls, 0) AS device_uninstalls,
COALESCE(install_metrics.device_upgrades, 0) AS device_upgrades,
COALESCE(install_metrics.user_installs, 0) AS user_installs,
COALESCE(install_metrics.user_uninstalls, 0) AS user_uninstalls,
COALESCE(crashes.crashes, 0) AS crashes, /* metrics based on events. a user or device can have multiple events in one day */
COALESCE(crashes.anrs, 0) AS anrs,
COALESCE(install_metrics.install_events, 0) AS install_events,
COALESCE(install_metrics.uninstall_events, 0) AS uninstall_events,
COALESCE(install_metrics.update_events, 0) AS update_events,
install_metrics.total_device_installs,
install_metrics.total_device_uninstalls,
ratings.average_rating, /* this one actually isn't rolling but we won't coalesce days with no reviews to 0 rating */
ratings.rolling_total_average_rating
FROM install_metrics
FULL OUTER JOIN ratings
ON install_metrics.date_day = ratings.date_day
AND install_metrics.package_name = ratings.package_name
AND /* coalesce null os versions otherwise they'll cause fanout with the full outer join */ COALESCE(install_metrics.android_os_version, 'null_os_version') = COALESCE(ratings.android_os_version, 'null_os_version') /* in the source package we aggregate all null device-type records together into one batch per day */
FULL OUTER JOIN crashes
ON COALESCE(install_metrics.date_day, ratings.date_day) = crashes.date_day
AND COALESCE(install_metrics.package_name, ratings.package_name) = crashes.package_name
AND /* coalesce null countries otherwise they'll cause fanout with the full outer join */ COALESCE(install_metrics.android_os_version, ratings.android_os_version, 'null_os_version') = COALESCE(crashes.android_os_version, 'null_os_version') /* in the source package we aggregate all null device-type records together into one batch per day */
), create_partitions /* to backfill in days with NULL values for rolling metrics, we'll create partitions to batch them together with records that have non-null values */ /* we can't just use last_value(ignore nulls) because of postgres :/ */ AS (
SELECT
*,
SUM(CASE WHEN rolling_total_average_rating IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY android_os_version, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS rolling_total_average_rating_partition,
SUM(CASE WHEN total_device_installs IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY android_os_version, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_installs_partition,
SUM(CASE WHEN total_device_uninstalls IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY android_os_version, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_uninstalls_partition
FROM app_version_join
), fill_values /* now we'll take the non-null value for each partitioned batch and propagate it across the rows included in the batch */ AS (
SELECT
date_day,
android_os_version,
package_name,
device_installs,
device_uninstalls,
device_upgrades,
user_installs,
user_uninstalls,
crashes,
anrs,
install_events,
uninstall_events,
update_events,
active_devices_last_30_days,
average_rating,
FIRST_VALUE(rolling_total_average_rating) OVER (PARTITION BY rolling_total_average_rating_partition, android_os_version, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS rolling_total_average_rating,
FIRST_VALUE(total_device_installs) OVER (PARTITION BY total_device_installs_partition, android_os_version, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_installs,
FIRST_VALUE(total_device_uninstalls) OVER (PARTITION BY total_device_uninstalls_partition, android_os_version, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_uninstalls
FROM create_partitions
), final AS (
SELECT
date_day,
android_os_version,
package_name,
device_installs,
device_uninstalls,
device_upgrades,
user_installs,
user_uninstalls,
crashes,
anrs,
install_events,
uninstall_events,
update_events,
active_devices_last_30_days,
average_rating,
rolling_total_average_rating, /* leave null if there are no ratings yet */
COALESCE(total_device_installs, 0) AS total_device_installs, /* the first day will have NULL values, let's make it 0 */
COALESCE(total_device_uninstalls, 0) AS total_device_uninstalls,
COALESCE(total_device_installs, 0) /* calculate difference rolling metric */ - COALESCE(total_device_uninstalls, 0) AS net_device_installs
FROM fill_values
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
active_devices_last_30_days | INT64 | The number of active devices on which your app is installed. An active device is one that has been turned on at least once in the past 30 days. |
android_os_version | STRING | Operation System of the android being used. May be NULL if users do not consent to being tracked. |
device_installs | INT64 | Devices on which users installed your app on this day. An individual user can have multiple device installs. |
device_uninstalls | INT64 | The number of devices from which users uninstalled your app on a given day. |
device_upgrades | INT64 | The number of devices from which users upgraded your app on a given day. |
user_installs | INT64 | The number of users who installed your app and did not have it installed on any other devices at the time on a given day. |
user_uninstalls | INT64 | The number of users who uninstalled your app from all of their devices on this day. |
date_day | DATE | The date on which the data is reported. |
install_events | INT64 | The number of times your app was installed, including devices on which the app had been installed previously. This does not include pre-installs or device reactivations. |
package_name | STRING | The package name of the app owning the report data. |
uninstall_events | INT64 | The number of times your app was uninstalled. This does not include inactive devices. |
update_events | INT64 | The number of times your app was updated. |
anrs | INT64 | Daily application not responding (ANR) reports collected from Android devices whose users have opted in to automatically share usage and diagnostics data. |
crashes | INT64 | Daily crash reports collected from Android devices whose users have opted in to automatically share usage and diagnostics data. |
rolling_total_average_rating | FLOAT64 | Average star rating this app received across all ratings submitted up to and including the past day. For each user submitting a rating, only their most recent rating of the app is counted. |
average_rating | FLOAT64 | Average star rating this app has received across all ratings submitted on a given day. |
total_device_installs | INT64 | Cumulative number of device installs up to and including this day (aggregated on the dimension of this table). This does not take uninstalls into account and may contain duplicate devices. |
total_device_uninstalls | INT64 | Cumulative number of device uninstalls up to and including this day (aggregated on the dimension of this table). This does not take re-installs into account and may contain duplicate devices. |
net_device_installs | INT64 | Cumulative net number of device installs up to and including this day (aggregated on the dimension of this table). This is the difference of `total_device_installs` and `total_device_uninstalls`. |
This SQL query creates a comprehensive overview report for Google Play app performance. It integrates data from various sources including store performance, installs, ratings, and crashes. The query performs multiple tasks such as rolling up country-level data, calculating cumulative metrics over time, joining different data sets, filling in missing values, and computing derived metrics like conversion rates and net installs. The final output provides a daily snapshot of each app's performance across various dimensions.
IntegrationAggregationFeaturizationCleaningOtherWITH __dbt__cte__int_google_play__store_performance AS (
WITH store_performance AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__store_performance_country
), store_performance_rollup AS (
SELECT
date_day,
package_name,
SUM(store_listing_acquisitions) AS store_listing_acquisitions,
SUM(store_listing_visitors) AS store_listing_visitors
FROM store_performance
GROUP BY
1,
2
), store_performance_metrics AS (
SELECT
*,
ROUND(store_listing_acquisitions * 1.0 / NULLIF(store_listing_visitors, 0), 4) AS store_listing_conversion_rate,
SUM(store_listing_acquisitions) OVER (PARTITION BY package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_store_acquisitions,
SUM(store_listing_visitors) OVER (PARTITION BY package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_store_visitors
FROM store_performance_rollup
)
SELECT
*
FROM store_performance_metrics
), installs AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_installs_overview
), ratings AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_ratings_overview
), crashes AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_crashes_overview
), store_performance AS (
SELECT
*
FROM __dbt__cte__int_google_play__store_performance /* country rollup */
), install_metrics AS (
SELECT
*,
SUM(device_installs) OVER (PARTITION BY package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_installs,
SUM(device_uninstalls) OVER (PARTITION BY package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_uninstalls
FROM installs
), overview_join AS (
SELECT
COALESCE(
install_metrics.date_day,
ratings.date_day,
store_performance.date_day,
crashes.date_day
) AS date_day, /* these 2 columns are the grain of this model */
COALESCE(
install_metrics.package_name,
ratings.package_name,
store_performance.package_name,
crashes.package_name
) AS package_name,
COALESCE(install_metrics.active_devices_last_30_days, 0) AS active_devices_last_30_days, /* metrics based on unique devices + users */
COALESCE(install_metrics.device_installs, 0) AS device_installs,
COALESCE(install_metrics.device_uninstalls, 0) AS device_uninstalls,
COALESCE(install_metrics.device_upgrades, 0) AS device_upgrades,
COALESCE(install_metrics.user_installs, 0) AS user_installs,
COALESCE(install_metrics.user_uninstalls, 0) AS user_uninstalls,
COALESCE(store_performance.store_listing_acquisitions, 0) AS store_listing_acquisitions,
COALESCE(store_performance.store_listing_visitors, 0) AS store_listing_visitors,
store_performance.store_listing_conversion_rate, /* not coalescing if there aren't any visitors */
COALESCE(crashes.crashes, 0) AS crashes, /* metrics based on events. a user or device can have multiple installs in one day */
COALESCE(crashes.anrs, 0) AS anrs,
COALESCE(install_metrics.install_events, 0) AS install_events,
COALESCE(install_metrics.uninstall_events, 0) AS uninstall_events,
COALESCE(install_metrics.update_events, 0) AS update_events,
install_metrics.total_device_installs,
install_metrics.total_device_uninstalls,
ratings.average_rating, /* this one actually isn't rolling but we won't coalesce days with no reviews to 0 rating. todo: move */
ratings.rolling_total_average_rating,
store_performance.total_store_acquisitions,
store_performance.total_store_visitors
FROM install_metrics
FULL OUTER JOIN ratings
ON install_metrics.date_day = ratings.date_day
AND install_metrics.package_name = ratings.package_name
FULL OUTER JOIN store_performance
ON store_performance.date_day = COALESCE(install_metrics.date_day, ratings.date_day)
AND store_performance.package_name = COALESCE(install_metrics.package_name, ratings.package_name)
FULL OUTER JOIN crashes
ON COALESCE(install_metrics.date_day, ratings.date_day, store_performance.date_day) = crashes.date_day
AND COALESCE(install_metrics.package_name, ratings.package_name, store_performance.package_name) = crashes.package_name
), create_partitions /* to backfill in days with NULL values for rolling metrics, we'll create partitions to batch them together with records that have non-null values */ /* we can't just use last_value(ignore nulls) because of postgres :/ */ AS (
SELECT
*,
SUM(CASE WHEN rolling_total_average_rating IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS rolling_total_average_rating_partition,
SUM(CASE WHEN total_device_installs IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_installs_partition,
SUM(CASE WHEN total_device_uninstalls IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_uninstalls_partition,
SUM(CASE WHEN total_store_acquisitions IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_store_acquisitions_partition,
SUM(CASE WHEN total_store_visitors IS NULL THEN 0 ELSE 1 END) OVER (PARTITION BY package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_store_visitors_partition
FROM overview_join
), fill_values /* now we'll take the non-null value for each partitioned batch and propagate it across the rows included in the batch */ AS (
SELECT
date_day,
package_name,
active_devices_last_30_days,
device_installs,
device_uninstalls,
device_upgrades,
user_installs,
user_uninstalls,
crashes,
anrs,
install_events,
uninstall_events,
update_events,
store_listing_acquisitions,
store_listing_visitors,
store_listing_conversion_rate,
average_rating,
FIRST_VALUE(rolling_total_average_rating) OVER (PARTITION BY rolling_total_average_rating_partition, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS rolling_total_average_rating,
FIRST_VALUE(total_device_installs) OVER (PARTITION BY total_device_installs_partition, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_installs,
FIRST_VALUE(total_device_uninstalls) OVER (PARTITION BY total_device_uninstalls_partition, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_device_uninstalls,
FIRST_VALUE(total_store_acquisitions) OVER (PARTITION BY total_store_acquisitions_partition, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_store_acquisitions,
FIRST_VALUE(total_store_visitors) OVER (PARTITION BY total_store_visitors_partition, package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_store_visitors
FROM create_partitions
), final AS (
SELECT
date_day,
package_name,
device_installs,
device_uninstalls,
device_upgrades,
user_installs,
user_uninstalls,
crashes,
anrs,
install_events,
uninstall_events,
update_events,
store_listing_acquisitions,
store_listing_visitors,
store_listing_conversion_rate,
active_devices_last_30_days,
average_rating,
rolling_total_average_rating, /* leave null if there are no ratings yet */
COALESCE(total_device_installs, 0) AS total_device_installs, /* the first day will have NULL values, let's make it 0 */
COALESCE(total_device_uninstalls, 0) AS total_device_uninstalls,
COALESCE(total_store_acquisitions, 0) AS total_store_acquisitions,
COALESCE(total_store_visitors, 0) AS total_store_visitors,
ROUND(
CAST(total_store_acquisitions AS DECIMAL(28, 6)) / NULLIF(total_store_visitors, 0),
4
) AS rolling_store_conversion_rate, /* calculate percentage and difference rolling metrics */
COALESCE(total_device_installs, 0) - COALESCE(total_device_uninstalls, 0) AS net_device_installs
FROM fill_values
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
active_devices_last_30_days | INT64 | The number of active devices on which your app is installed. An active device is one that has been turned on at least once in the past 30 days. |
android_os_version | None | Operation System of the android being used. May be NULL if users do not consent to being tracked. |
device_installs | INT64 | Devices on which users installed your app on this day. An individual user can have multiple device installs. |
device_uninstalls | INT64 | The number of devices from which users uninstalled your app on a given day. |
device_upgrades | INT64 | The number of devices from which users upgraded your app on a given day. |
user_installs | INT64 | The number of users who installed your app and did not have it installed on any other devices at the time on a given day. |
user_uninstalls | INT64 | The number of users who uninstalled your app from all of their devices on this day. |
date_day | DATE | The date on which the data is reported. |
install_events | INT64 | The number of times your app was installed, including devices on which the app had been installed previously. This does not include pre-installs or device reactivations. |
package_name | STRING | The package name of the app owning the report data. |
uninstall_events | INT64 | The number of times your app was uninstalled. This does not include inactive devices. |
update_events | INT64 | The number of times your app was updated. |
anrs | INT64 | Daily application not responding (ANR) reports collected from Android devices whose users have opted in to automatically share usage and diagnostics data. |
crashes | INT64 | Daily crash reports collected from Android devices whose users have opted in to automatically share usage and diagnostics data. |
rolling_total_average_rating | FLOAT64 | Average star rating this app received across all ratings submitted up to and including the past day. For each user submitting a rating, only their most recent rating of the app is counted. |
average_rating | FLOAT64 | Average star rating this app has received across all ratings submitted on a given day. |
total_device_installs | INT64 | Cumulative number of device installs up to and including this day (aggregated on the dimension of this table). This does not take uninstalls into account and may contain duplicate devices. |
total_device_uninstalls | INT64 | Cumulative number of device uninstalls up to and including this day (aggregated on the dimension of this table). This does not take re-installs into account and may contain duplicate devices. |
net_device_installs | INT64 | Cumulative net number of device installs up to and including this day (aggregated on the dimension of this table). This is the difference of `total_device_installs` and `total_device_uninstalls`. |
store_listing_acquisitions | INT64 | The number of users that visited your store listing and installed your app, who did not have your app installed on any device. |
store_listing_conversion_rate | FLOAT64 | The percentage of store listing visitors who installed your app on a given day. Note: Does not include visits or installs from users who already have your app installed on another device |
store_listing_visitors | INT64 | The number of users that visited your store listing who did not have your app installed on any device. |
total_store_acquisitions | INT64 | Cumulative number of users that visited your store listing and installed your app, who did not have your app installed on any device prior to this. |
total_store_visitors | INT64 | Cumulative number of users that visited your store listing who did not have your app installed on any device at the time. |
rolling_store_conversion_rate | NUMERIC | Rolling percentage of store listing visitors who installed your app within a given country. The ratio of `total_store_acquisitions` to `total_store_visitors`. |
This SQL query processes Google Play store performance data. It starts by selecting all data from a staging table, then aggregates store listing acquisitions and visitors by date and package name. Finally, it calculates additional metrics such as conversion rate and cumulative totals for acquisitions and visitors. The result is a comprehensive view of store performance metrics over time for each package.
AggregationFeaturizationIntegrationWITH store_performance AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__store_performance_country
), store_performance_rollup AS (
SELECT
date_day,
package_name,
SUM(store_listing_acquisitions) AS store_listing_acquisitions,
SUM(store_listing_visitors) AS store_listing_visitors
FROM store_performance
GROUP BY
1,
2
), store_performance_metrics AS (
SELECT
*,
ROUND(store_listing_acquisitions * 1.0 / NULLIF(store_listing_visitors, 0), 4) AS store_listing_conversion_rate,
SUM(store_listing_acquisitions) OVER (PARTITION BY package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_store_acquisitions,
SUM(store_listing_visitors) OVER (PARTITION BY package_name ORDER BY date_day ASC rows BETWEEN UNBOUNDED preceding AND CURRENT ROW) AS total_store_visitors
FROM store_performance_rollup
)
SELECT
*
FROM store_performance_metrics
Name | Type | Comment |
---|
This SQL query processes data from a staging table for Google Play app crash statistics. It casts columns to specific data types, renames the 'date' column to 'date_day', and aggregates daily ANRs (Application Not Responding) and crashes by date, app version code, and package name.
CleaningAggregationWITH base AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_crashes_app_version_tmp
), fields AS (
SELECT
CAST(NULL AS TEXT) AS _file,
CAST(NULL AS TIMESTAMP) AS _fivetran_synced,
CAST(NULL AS INT) AS _line,
CAST(NULL AS TIMESTAMP) AS _modified,
CAST(NULL AS INT) AS app_version_code,
CAST(NULL AS INT) AS daily_anrs,
CAST(NULL AS INT) AS daily_crashes,
CAST(NULL AS DATE) AS date,
CAST(NULL AS TEXT) AS package_name
FROM base
), final AS (
SELECT
CAST(date AS DATE) AS date_day,
app_version_code,
package_name,
SUM(daily_anrs) AS anrs,
SUM(daily_crashes) AS crashes
FROM fields
GROUP BY
1,
2,
3
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
app_version_code | INT64 | Integer value of the version of the app being reported on. |
anrs | INT64 | Daily application not responding (ANR) reports collected from Android devices whose users have opted in to automatically share usage and diagnostics data. |
crashes | INT64 | Daily crash reports collected from Android devices whose users have opted in to automatically share usage and diagnostics data. |
date_day | DATE | The date on which the data is reported. |
package_name | STRING | The package name of the app owning the report data. |
This SQL query selects all columns and rows from the 'stats_crashes_app_version' table in the 'google_play' schema of the 'TEST' database. It appears to be a simple data extraction query without any transformations, filters, or aggregations.
OtherSELECT
*
FROM TEST.google_play.stats_crashes_app_version
Name | Type | Comment |
---|---|---|
_file | STRING | None |
_line | INT64 | None |
_modified | STRING | None |
date | DATE | None |
package_name | STRING | None |
app_version_code | INT64 | None |
daily_crashes | INT64 | None |
daily_anrs | INT64 | None |
_fivetran_synced | STRING | None |
This SQL query processes data from a temporary staging table for Google Play stats on crashes by OS version. It casts columns to specific data types, renames the 'date' column to 'date_day', and aggregates the daily ANRs (Application Not Responding) and crashes by date, Android OS version, and package name.
CleaningAggregationWITH base AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_crashes_os_version_tmp
), fields AS (
SELECT
CAST(NULL AS TEXT) AS _file,
CAST(NULL AS TIMESTAMP) AS _fivetran_synced,
CAST(NULL AS INT) AS _line,
CAST(NULL AS TIMESTAMP) AS _modified,
CAST(NULL AS TEXT) AS android_os_version,
CAST(NULL AS INT) AS daily_anrs,
CAST(NULL AS INT) AS daily_crashes,
CAST(NULL AS DATE) AS date,
CAST(NULL AS TEXT) AS package_name
FROM base
), final AS (
SELECT
CAST(date AS DATE) AS date_day,
android_os_version,
package_name,
SUM(daily_anrs) AS anrs,
SUM(daily_crashes) AS crashes
FROM fields
GROUP BY
1,
2,
3
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
android_os_version | STRING | Operation System of the android being used. May be NULL if users do not consent to being tracked. |
anrs | INT64 | Daily application not responding (ANR) reports collected from Android devices whose users have opted in to automatically share usage and diagnostics data. |
crashes | INT64 | Daily crash reports collected from Android devices whose users have opted in to automatically share usage and diagnostics data. |
date_day | DATE | The date on which the data is reported. |
package_name | STRING | The package name of the app owning the report data. |
This SQL query selects all columns and rows from the 'stats_crashes_os_version' table in the 'google_play' schema of the 'TEST' database. It appears to be a straightforward data extraction without any transformations, filters, or aggregations.
OtherSELECT
*
FROM TEST.google_play.stats_crashes_os_version
Name | Type | Comment |
---|---|---|
_file | STRING | None |
_line | INT64 | None |
_modified | STRING | None |
date | DATE | None |
package_name | STRING | None |
android_os_version | STRING | None |
daily_crashes | INT64 | None |
daily_anrs | INT64 | None |
_fivetran_synced | STRING | None |
This SQL query performs data type casting and column renaming on the 'stg_google_play__stats_crashes_overview_tmp' table. It casts the 'date' column to DATE type, renames 'daily_anrs' to 'anrs' and 'daily_crashes' to 'crashes', and selects specific columns for the final output. The query doesn't filter, deduplicate, or aggregate data, but focuses on cleaning and standardizing the data format.
CleaningWITH base AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_crashes_overview_tmp
), fields AS (
SELECT
CAST(NULL AS TEXT) AS _file,
CAST(NULL AS TIMESTAMP) AS _fivetran_synced,
CAST(NULL AS INT) AS _line,
CAST(NULL AS TIMESTAMP) AS _modified,
CAST(NULL AS INT) AS daily_anrs,
CAST(NULL AS INT) AS daily_crashes,
CAST(NULL AS DATE) AS date,
CAST(NULL AS TEXT) AS package_name
FROM base
), final AS (
SELECT
CAST(date AS DATE) AS date_day,
package_name,
daily_anrs AS anrs,
daily_crashes AS crashes,
_fivetran_synced
FROM fields
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
anrs | INT64 | Daily application not responding (ANR) reports collected from Android devices whose users have opted in to automatically share usage and diagnostics data. |
crashes | INT64 | Daily crash reports collected from Android devices whose users have opted in to automatically share usage and diagnostics data. |
date_day | DATE | The date on which the data is reported. |
package_name | STRING | The package name of the app owning the report data. |
_fivetran_synced | STRING | None |
This SQL query performs a simple SELECT * operation to retrieve all columns and rows from the 'stats_crashes_overview' table in the 'google_play' schema of the 'TEST' database. It's a straightforward data extraction query without any filtering, transformation, or aggregation.
OtherSELECT
*
FROM TEST.google_play.stats_crashes_overview
Name | Type | Comment |
---|---|---|
_file | STRING | None |
_line | INT64 | None |
_modified | STRING | None |
date | DATE | None |
package_name | STRING | None |
daily_crashes | INT64 | None |
daily_anrs | INT64 | None |
_fivetran_synced | STRING | None |
This SQL query performs data transformation and aggregation on Google Play Store app installation statistics. It starts by casting columns to specific data types, then aggregates various installation metrics (such as active devices, installs, uninstalls, and upgrades) by date, app version code, and package name. The query cleans and standardizes the data structure while providing summarized installation statistics for analysis.
CleaningAggregationFeaturizationWITH base AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_installs_app_version_tmp
), fields AS (
SELECT
CAST(NULL AS TEXT) AS _file,
CAST(NULL AS TIMESTAMP) AS _fivetran_synced,
CAST(NULL AS INT) AS _line,
CAST(NULL AS TIMESTAMP) AS _modified,
CAST(NULL AS INT) AS active_device_installs,
CAST(NULL AS INT) AS app_version_code,
CAST(NULL AS INT) AS current_device_installs,
CAST(NULL AS INT) AS current_user_installs,
CAST(NULL AS INT) AS daily_device_installs,
CAST(NULL AS INT) AS daily_device_uninstalls,
CAST(NULL AS INT) AS daily_device_upgrades,
CAST(NULL AS INT) AS daily_user_installs,
CAST(NULL AS INT) AS daily_user_uninstalls,
CAST(NULL AS DATE) AS date,
CAST(NULL AS INT) AS install_events,
CAST(NULL AS TEXT) AS package_name,
CAST(NULL AS INT) AS total_user_installs,
CAST(NULL AS INT) AS uninstall_events,
CAST(NULL AS INT) AS update_events
FROM base
), final AS (
SELECT
CAST(date AS DATE) AS date_day,
app_version_code,
package_name,
SUM(active_device_installs) AS active_devices_last_30_days,
SUM(daily_device_installs) AS device_installs,
SUM(daily_device_uninstalls) AS device_uninstalls,
SUM(daily_device_upgrades) AS device_upgrades,
SUM(daily_user_installs) AS user_installs,
SUM(daily_user_uninstalls) AS user_uninstalls,
SUM(install_events) AS install_events,
SUM(uninstall_events) AS uninstall_events,
SUM(update_events) AS update_events
FROM fields
GROUP BY
1,
2,
3
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
active_devices_last_30_days | INT64 | The number of active devices on which your app is installed. An active device is one that has been turned on at least once in the past 30 days. |
app_version_code | INT64 | Integer value of the version of the app being reported on. |
device_installs | INT64 | Devices on which users installed your app on this day. An individual user can have multiple device installs. |
device_uninstalls | INT64 | The number of devices from which users uninstalled your app on a given day. |
device_upgrades | INT64 | The number of devices from which users upgraded your app on a given day. |
user_installs | INT64 | The number of users who installed your app and did not have it installed on any other devices at the time on a given day. |
user_uninstalls | INT64 | The number of users who uninstalled your app from all of their devices on this day. |
date_day | DATE | The date on which the data is reported. |
install_events | INT64 | The number of times your app was installed, including devices on which the app had been installed previously. This does not include pre-installs or device reactivations. |
package_name | STRING | The package name of the app owning the report data. |
uninstall_events | INT64 | The number of times your app was uninstalled. This does not include inactive devices. |
update_events | INT64 | The number of times your app was updated. |
This SQL query selects all columns and rows from the table 'stats_installs_app_version' in the 'google_play' schema of the 'TEST' database. It's a simple SELECT * statement without any filtering, transformation, or aggregation.
OtherSELECT
*
FROM TEST.google_play.stats_installs_app_version
Name | Type | Comment |
---|---|---|
_file | STRING | None |
_line | INT64 | None |
_modified | STRING | None |
date | DATE | None |
package_name | STRING | None |
app_version_code | INT64 | None |
current_device_installs | INT64 | None |
daily_device_installs | INT64 | None |
daily_device_uninstalls | INT64 | None |
daily_device_upgrades | INT64 | None |
current_user_installs | INT64 | None |
total_user_installs | INT64 | None |
daily_user_installs | INT64 | None |
daily_user_uninstalls | INT64 | None |
_fivetran_synced | STRING | None |
active_device_installs | INT64 | None |
install_events | INT64 | None |
update_events | INT64 | None |
uninstall_events | INT64 | None |
This SQL query transforms and aggregates data from a Google Play stats table. It first casts all columns to specific data types, then aggregates various install and uninstall metrics by date, country, and package name. The query performs data type conversion, renames some columns, and calculates sums for several metrics.
CleaningAggregationFeaturizationWITH base AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_installs_country_tmp
), fields AS (
SELECT
CAST(NULL AS TEXT) AS _file,
CAST(NULL AS TIMESTAMP) AS _fivetran_synced,
CAST(NULL AS INT) AS _line,
CAST(NULL AS TIMESTAMP) AS _modified,
CAST(NULL AS INT) AS active_device_installs,
CAST(NULL AS TEXT) AS country,
CAST(NULL AS INT) AS current_device_installs,
CAST(NULL AS INT) AS current_user_installs,
CAST(NULL AS INT) AS daily_device_installs,
CAST(NULL AS INT) AS daily_device_uninstalls,
CAST(NULL AS INT) AS daily_device_upgrades,
CAST(NULL AS INT) AS daily_user_installs,
CAST(NULL AS INT) AS daily_user_uninstalls,
CAST(NULL AS DATE) AS date,
CAST(NULL AS INT) AS install_events,
CAST(NULL AS TEXT) AS package_name,
CAST(NULL AS INT) AS total_user_installs,
CAST(NULL AS INT) AS uninstall_events,
CAST(NULL AS INT) AS update_events
FROM base
), final AS (
SELECT
CAST(date AS DATE) AS date_day,
country,
package_name,
SUM(active_device_installs) AS active_devices_last_30_days,
SUM(daily_device_installs) AS device_installs,
SUM(daily_device_uninstalls) AS device_uninstalls,
SUM(daily_device_upgrades) AS device_upgrades,
SUM(daily_user_installs) AS user_installs,
SUM(daily_user_uninstalls) AS user_uninstalls,
SUM(install_events) AS install_events,
SUM(uninstall_events) AS uninstall_events,
SUM(update_events) AS update_events
FROM fields
GROUP BY
1,
2,
3
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
active_devices_last_30_days | INT64 | The number of active devices on which your app is installed. An active device is one that has been turned on at least once in the past 30 days. |
country | STRING | Two-letter abbreviation of the country where the user’s Google account is registered. |
device_installs | INT64 | Devices on which users installed your app on this day. An individual user can have multiple device installs. |
device_uninstalls | INT64 | The number of devices from which users uninstalled your app on a given day. |
device_upgrades | INT64 | The number of devices from which users upgraded your app on a given day. |
user_installs | INT64 | The number of users who installed your app and did not have it installed on any other devices at the time on a given day. |
user_uninstalls | INT64 | The number of users who uninstalled your app from all of their devices on this day. |
date_day | DATE | The date on which the data is reported. |
install_events | INT64 | The number of times your app was installed, including devices on which the app had been installed previously. This does not include pre-installs or device reactivations. |
package_name | STRING | The package name of the app owning the report data. |
uninstall_events | INT64 | The number of times your app was uninstalled. This does not include inactive devices. |
update_events | INT64 | The number of times your app was updated. |
This SQL query selects all columns from the 'stats_installs_country' table in the 'google_play' schema of the 'TEST' database. It's a simple SELECT * statement that retrieves all data from the specified table without any modifications, filters, or transformations.
OtherSELECT
*
FROM TEST.google_play.stats_installs_country
Name | Type | Comment |
---|---|---|
_file | STRING | None |
_line | INT64 | None |
_modified | STRING | None |
date | DATE | None |
package_name | STRING | None |
country | STRING | None |
current_device_installs | INT64 | None |
daily_device_installs | INT64 | None |
daily_device_uninstalls | INT64 | None |
daily_device_upgrades | INT64 | None |
current_user_installs | INT64 | None |
total_user_installs | INT64 | None |
daily_user_installs | INT64 | None |
daily_user_uninstalls | INT64 | None |
_fivetran_synced | STRING | None |
active_device_installs | INT64 | None |
install_events | INT64 | None |
update_events | INT64 | None |
uninstall_events | INT64 | None |
This SQL query processes data from a Google Play statistics table. It starts by casting columns to specific data types, then aggregates various installation and uninstallation metrics by date, device type, and package name. The query summarizes daily device and user installation activities, including installs, uninstalls, upgrades, and active devices over the last 30 days.
CleaningAggregationFeaturizationWITH base AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_installs_device_tmp
), fields AS (
SELECT
CAST(NULL AS TEXT) AS _file,
CAST(NULL AS TIMESTAMP) AS _fivetran_synced,
CAST(NULL AS INT) AS _line,
CAST(NULL AS TIMESTAMP) AS _modified,
CAST(NULL AS INT) AS active_device_installs,
CAST(NULL AS INT) AS current_device_installs,
CAST(NULL AS INT) AS current_user_installs,
CAST(NULL AS INT) AS daily_device_installs,
CAST(NULL AS INT) AS daily_device_uninstalls,
CAST(NULL AS INT) AS daily_device_upgrades,
CAST(NULL AS INT) AS daily_user_installs,
CAST(NULL AS INT) AS daily_user_uninstalls,
CAST(NULL AS DATE) AS date,
CAST(NULL AS TEXT) AS device,
CAST(NULL AS INT) AS install_events,
CAST(NULL AS TEXT) AS package_name,
CAST(NULL AS INT) AS total_user_installs,
CAST(NULL AS INT) AS uninstall_events,
CAST(NULL AS INT) AS update_events
FROM base
), final AS (
SELECT
CAST(date AS DATE) AS date_day,
device, /* device_type */
package_name,
SUM(active_device_installs) AS active_devices_last_30_days,
SUM(daily_device_installs) AS device_installs,
SUM(daily_device_uninstalls) AS device_uninstalls,
SUM(daily_device_upgrades) AS device_upgrades,
SUM(daily_user_installs) AS user_installs,
SUM(daily_user_uninstalls) AS user_uninstalls,
SUM(install_events) AS install_events,
SUM(uninstall_events) AS uninstall_events,
SUM(update_events) AS update_events
FROM fields
GROUP BY
1,
2,
3
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
active_devices_last_30_days | INT64 | The number of active devices on which your app is installed. An active device is one that has been turned on at least once in the past 30 days. |
device_installs | INT64 | Devices on which users installed your app on this day. An individual user can have multiple device installs. |
device_uninstalls | INT64 | The number of devices from which users uninstalled your app on a given day. |
device_upgrades | INT64 | The number of devices from which users upgraded your app on a given day. |
user_installs | INT64 | The number of users who installed your app and did not have it installed on any other devices at the time on a given day. |
user_uninstalls | INT64 | The number of users who uninstalled your app from all of their devices on this day. |
date_day | DATE | The date on which the data is reported. |
device | STRING | Type of device model. May be NULL if users do not consent to being tracked. |
install_events | INT64 | The number of times your app was installed, including devices on which the app had been installed previously. This does not include pre-installs or device reactivations. |
package_name | STRING | The package name of the app owning the report data. |
uninstall_events | INT64 | The number of times your app was uninstalled. This does not include inactive devices. |
update_events | INT64 | The number of times your app was updated. |
This SQL query selects all columns from the 'stats_installs_device' table in the 'google_play' schema of the 'TEST' database. It appears to be a simple data extraction query without any filtering, transformation, or aggregation.
OtherSELECT
*
FROM TEST.google_play.stats_installs_device
Name | Type | Comment |
---|---|---|
_file | STRING | None |
_line | INT64 | None |
_modified | STRING | None |
date | DATE | None |
package_name | STRING | None |
device | STRING | None |
current_device_installs | INT64 | None |
daily_device_installs | INT64 | None |
daily_device_uninstalls | INT64 | None |
daily_device_upgrades | INT64 | None |
current_user_installs | INT64 | None |
total_user_installs | INT64 | None |
daily_user_installs | INT64 | None |
daily_user_uninstalls | INT64 | None |
_fivetran_synced | STRING | None |
active_device_installs | INT64 | None |
install_events | INT64 | None |
update_events | INT64 | None |
uninstall_events | INT64 | None |
This SQL query processes data from a Google Play stats table. It starts by casting columns to specific data types, then aggregates various install and uninstall metrics by date, package name, and Android OS version. The query performs summations on multiple metrics such as active devices, installs, uninstalls, and upgrades.
CleaningAggregationWITH base AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_installs_os_version_tmp
), fields AS (
SELECT
CAST(NULL AS TEXT) AS _file,
CAST(NULL AS TIMESTAMP) AS _fivetran_synced,
CAST(NULL AS INT) AS _line,
CAST(NULL AS TIMESTAMP) AS _modified,
CAST(NULL AS INT) AS active_device_installs,
CAST(NULL AS TEXT) AS android_os_version,
CAST(NULL AS INT) AS current_device_installs,
CAST(NULL AS INT) AS current_user_installs,
CAST(NULL AS INT) AS daily_device_installs,
CAST(NULL AS INT) AS daily_device_uninstalls,
CAST(NULL AS INT) AS daily_device_upgrades,
CAST(NULL AS INT) AS daily_user_installs,
CAST(NULL AS INT) AS daily_user_uninstalls,
CAST(NULL AS DATE) AS date,
CAST(NULL AS INT) AS install_events,
CAST(NULL AS TEXT) AS package_name,
CAST(NULL AS INT) AS total_user_installs,
CAST(NULL AS INT) AS uninstall_events,
CAST(NULL AS INT) AS update_events
FROM base
), final AS (
SELECT
CAST(date AS DATE) AS date_day,
package_name,
android_os_version,
SUM(active_device_installs) AS active_devices_last_30_days,
SUM(daily_device_installs) AS device_installs,
SUM(daily_device_uninstalls) AS device_uninstalls,
SUM(daily_device_upgrades) AS device_upgrades,
SUM(daily_user_installs) AS user_installs,
SUM(daily_user_uninstalls) AS user_uninstalls,
SUM(install_events) AS install_events,
SUM(uninstall_events) AS uninstall_events,
SUM(update_events) AS update_events
FROM fields
GROUP BY
1,
2,
3
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
active_devices_last_30_days | INT64 | The number of active devices on which your app is installed. An active device is one that has been turned on at least once in the past 30 days. |
android_os_version | STRING | Operation System of the android being used. May be NULL if users do not consent to being tracked. |
device_installs | INT64 | Devices on which users installed your app on this day. An individual user can have multiple device installs. |
device_uninstalls | INT64 | The number of devices from which users uninstalled your app on a given day. |
device_upgrades | INT64 | The number of devices from which users upgraded your app on a given day. |
user_installs | INT64 | The number of users who installed your app and did not have it installed on any other devices at the time on a given day. |
user_uninstalls | INT64 | The number of users who uninstalled your app from all of their devices on this day. |
date_day | DATE | The date on which the data is reported. |
install_events | INT64 | The number of times your app was installed, including devices on which the app had been installed previously. This does not include pre-installs or device reactivations. |
package_name | STRING | The package name of the app owning the report data. |
uninstall_events | INT64 | The number of times your app was uninstalled. This does not include inactive devices. |
update_events | INT64 | The number of times your app was updated. |
This SQL query selects all columns from the 'stats_installs_os_version' table in the 'google_play' schema of the 'TEST' database. It appears to be a simple selection of the entire table without any filtering, transformation, or aggregation.
OtherSELECT
*
FROM TEST.google_play.stats_installs_os_version
Name | Type | Comment |
---|---|---|
_file | STRING | None |
_line | INT64 | None |
_modified | STRING | None |
date | DATE | None |
package_name | STRING | None |
android_os_version | STRING | None |
current_device_installs | INT64 | None |
daily_device_installs | INT64 | None |
daily_device_uninstalls | INT64 | None |
daily_device_upgrades | INT64 | None |
current_user_installs | INT64 | None |
total_user_installs | INT64 | None |
daily_user_installs | INT64 | None |
daily_user_uninstalls | INT64 | None |
_fivetran_synced | STRING | None |
active_device_installs | INT64 | None |
install_events | INT64 | None |
update_events | INT64 | None |
uninstall_events | INT64 | None |
This SQL query stages data from a temporary table, casts various fields to specific data types, renames some columns, and selects a subset of columns for the final output. It primarily focuses on Google Play app install statistics, including device installs, uninstalls, upgrades, and user-related metrics.
CleaningFeaturizationWITH base AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_installs_overview_tmp
), fields AS (
SELECT
CAST(NULL AS TEXT) AS _file,
CAST(NULL AS TIMESTAMP) AS _fivetran_synced,
CAST(NULL AS INT) AS _line,
CAST(NULL AS TIMESTAMP) AS _modified,
CAST(NULL AS INT) AS active_device_installs,
CAST(NULL AS INT) AS current_device_installs,
CAST(NULL AS INT) AS current_user_installs,
CAST(NULL AS INT) AS daily_device_installs,
CAST(NULL AS INT) AS daily_device_uninstalls,
CAST(NULL AS INT) AS daily_device_upgrades,
CAST(NULL AS INT) AS daily_user_installs,
CAST(NULL AS INT) AS daily_user_uninstalls,
CAST(NULL AS DATE) AS date,
CAST(NULL AS INT) AS install_events,
CAST(NULL AS TEXT) AS package_name,
CAST(NULL AS INT) AS total_user_installs,
CAST(NULL AS INT) AS uninstall_events,
CAST(NULL AS INT) AS update_events
FROM base
), final AS (
SELECT
CAST(date AS DATE) AS date_day,
package_name,
active_device_installs AS active_devices_last_30_days,
daily_device_installs AS device_installs,
daily_device_uninstalls AS device_uninstalls,
daily_device_upgrades AS device_upgrades,
daily_user_installs AS user_installs,
daily_user_uninstalls AS user_uninstalls,
install_events,
uninstall_events,
update_events,
_fivetran_synced
FROM fields
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
active_devices_last_30_days | INT64 | The number of active devices on which your app is installed. An active device is one that has been turned on at least once in the past 30 days. |
device_installs | INT64 | Devices on which users installed your app on this day. An individual user can have multiple device installs. |
device_uninstalls | INT64 | The number of devices from which users uninstalled your app on a given day. |
device_upgrades | INT64 | The number of devices from which users upgraded your app on a given day. |
user_installs | INT64 | The number of users who installed your app and did not have it installed on any other devices at the time on a given day. |
user_uninstalls | INT64 | The number of users who uninstalled your app from all of their devices on this day. |
date_day | DATE | The date on which the data is reported. |
install_events | INT64 | The number of times your app was installed, including devices on which the app had been installed previously. This does not include pre-installs or device reactivations. |
package_name | STRING | The package name of the app owning the report data. |
uninstall_events | INT64 | The number of times your app was uninstalled. This does not include inactive devices. |
update_events | INT64 | The number of times your app was updated. |
_fivetran_synced | STRING | None |
This SQL query selects all columns and rows from the 'stats_installs_overview' table in the 'google_play' schema of the 'TEST' database. It appears to be a simple data extraction query without any transformations, filters, or aggregations.
OtherSELECT
*
FROM TEST.google_play.stats_installs_overview
Name | Type | Comment |
---|---|---|
_file | STRING | None |
_line | INT64 | None |
_modified | STRING | None |
date | DATE | None |
package_name | STRING | None |
current_device_installs | INT64 | None |
daily_device_installs | INT64 | None |
daily_device_uninstalls | INT64 | None |
daily_device_upgrades | INT64 | None |
current_user_installs | INT64 | None |
total_user_installs | INT64 | None |
daily_user_installs | INT64 | None |
daily_user_uninstalls | INT64 | None |
_fivetran_synced | STRING | None |
active_device_installs | INT64 | None |
install_events | INT64 | None |
update_events | INT64 | None |
uninstall_events | INT64 | None |
This SQL query performs data cleaning and transformation on the Google Play app ratings data. It casts columns to specific data types, handles null values, and renames some columns. The query also converts the 'daily_average_rating' from text to float, excluding 'NA' values, and applies conditional logic to filter out null app_version_code entries for certain columns.
CleaningFeaturizationWITH base AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_ratings_app_version_tmp
), fields AS (
SELECT
CAST(NULL AS TEXT) AS _file,
CAST(NULL AS TIMESTAMP) AS _fivetran_synced,
CAST(NULL AS INT) AS _line,
CAST(NULL AS TIMESTAMP) AS _modified,
CAST(NULL AS INT) AS app_version_code,
CAST(NULL AS TEXT) AS daily_average_rating,
CAST(NULL AS DATE) AS date,
CAST(NULL AS TEXT) AS package_name,
CAST(NULL AS FLOAT) AS total_average_rating
FROM base
), final AS (
SELECT
CAST(date AS DATE) AS date_day,
app_version_code,
package_name,
CASE
WHEN app_version_code IS NULL
THEN NULL
ELSE CAST(NULLIF(CAST(daily_average_rating AS TEXT), 'NA') AS FLOAT)
END AS average_rating,
CASE WHEN app_version_code IS NULL THEN NULL ELSE total_average_rating END AS rolling_total_average_rating
FROM fields
GROUP BY
1,
2,
3,
4,
5
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
app_version_code | INT64 | Integer value of the version of the app being reported on. |
average_rating | FLOAT64 | Average star rating this app has received across all ratings submitted on a given day. |
date_day | DATE | The date on which the data is reported. |
package_name | STRING | The package name of the app owning the report data. |
rolling_total_average_rating | FLOAT64 | Average star rating this app received across all ratings submitted up to and including the past day. For each user submitting a rating, only their most recent rating of the app is counted. |
This SQL query selects all columns from the 'stats_ratings_app_version' table in the 'google_play' schema of the 'TEST' database. It's a straightforward SELECT * statement without any filtering, transformations, or joins.
OtherSELECT
*
FROM TEST.google_play.stats_ratings_app_version
Name | Type | Comment |
---|---|---|
_file | STRING | None |
_line | INT64 | None |
_modified | STRING | None |
date | DATE | None |
package_name | STRING | None |
app_version_code | INT64 | None |
daily_average_rating | STRING | None |
total_average_rating | FLOAT64 | None |
_fivetran_synced | STRING | None |
This SQL query processes data from a Google Play stats ratings source. It casts various fields to specific data types, handles null values, and performs some data cleaning. The query specifically focuses on date, country, package name, daily average rating, and total average rating. It converts the daily average rating from text to float, excluding 'NA' values, and only includes ratings when a country is specified. The final output includes a date_day, country, package_name, average_rating, and rolling_total_average_rating.
CleaningFeaturizationWITH base AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_ratings_country_tmp
), fields AS (
SELECT
CAST(NULL AS TEXT) AS _file,
CAST(NULL AS TIMESTAMP) AS _fivetran_synced,
CAST(NULL AS INT) AS _line,
CAST(NULL AS TIMESTAMP) AS _modified,
CAST(NULL AS TEXT) AS country,
CAST(NULL AS TEXT) AS daily_average_rating,
CAST(NULL AS DATE) AS date,
CAST(NULL AS TEXT) AS package_name,
CAST(NULL AS FLOAT) AS total_average_rating
FROM base
), final AS (
SELECT
CAST(date AS DATE) AS date_day,
country,
package_name,
CASE
WHEN country IS NULL
THEN NULL
ELSE CAST(NULLIF(CAST(daily_average_rating AS TEXT), 'NA') AS FLOAT)
END AS average_rating,
CASE WHEN country IS NULL THEN NULL ELSE total_average_rating END AS rolling_total_average_rating
FROM fields
GROUP BY
1,
2,
3,
4,
5
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
country | STRING | Two-letter abbreviation of the country where the user’s Google account is registered. |
average_rating | FLOAT64 | Average star rating this app has received across all ratings submitted on a given day. |
date_day | DATE | The date on which the data is reported. |
package_name | STRING | The package name of the app owning the report data. |
rolling_total_average_rating | FLOAT64 | Average star rating this app received across all ratings submitted up to and including the past day. For each user submitting a rating, only their most recent rating of the app is counted. |
This SQL query selects all columns and rows from the table 'stats_ratings_country' in the 'google_play' schema of the 'TEST' database. It's a straightforward SELECT * statement without any filtering, transformation, or aggregation.
OtherSELECT
*
FROM TEST.google_play.stats_ratings_country
Name | Type | Comment |
---|---|---|
_file | STRING | None |
_line | INT64 | None |
_modified | STRING | None |
date | DATE | None |
package_name | STRING | None |
country | STRING | None |
daily_average_rating | STRING | None |
total_average_rating | FLOAT64 | None |
_fivetran_synced | STRING | None |
This SQL query performs data cleaning and transformation on the Google Play stats ratings device data. It casts various fields to appropriate data types, handles null values, and reformats the average rating and total average rating fields. The query also renames some columns for clarity.
CleaningFeaturizationWITH base AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_ratings_device_tmp
), fields AS (
SELECT
CAST(NULL AS TEXT) AS _file,
CAST(NULL AS TIMESTAMP) AS _fivetran_synced,
CAST(NULL AS INT) AS _line,
CAST(NULL AS TIMESTAMP) AS _modified,
CAST(NULL AS TEXT) AS daily_average_rating,
CAST(NULL AS DATE) AS date,
CAST(NULL AS TEXT) AS device,
CAST(NULL AS TEXT) AS package_name,
CAST(NULL AS FLOAT) AS total_average_rating
FROM base
), final AS (
SELECT
CAST(date AS DATE) AS date_day,
device,
package_name,
CASE
WHEN device IS NULL
THEN NULL
ELSE CAST(NULLIF(CAST(daily_average_rating AS TEXT), 'NA') AS FLOAT)
END AS average_rating,
CASE WHEN device IS NULL THEN NULL ELSE total_average_rating END AS rolling_total_average_rating
FROM fields
GROUP BY
1,
2,
3,
4,
5
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
average_rating | FLOAT64 | Average star rating this app has received across all ratings submitted on a given day. |
date_day | DATE | The date on which the data is reported. |
device | STRING | Type of device model. May be NULL if users do not consent to being tracked. |
package_name | STRING | The package name of the app owning the report data. |
rolling_total_average_rating | FLOAT64 | Average star rating this app received across all ratings submitted up to and including the past day. For each user submitting a rating, only their most recent rating of the app is counted. |
This SQL query selects all columns and rows from the 'stats_ratings_device' table in the 'google_play' schema of the 'TEST' database. It appears to be a simple data extraction query without any transformations, filters, or aggregations.
OtherSELECT
*
FROM TEST.google_play.stats_ratings_device
Name | Type | Comment |
---|---|---|
_file | STRING | None |
_line | INT64 | None |
_modified | STRING | None |
date | DATE | None |
package_name | STRING | None |
device | STRING | None |
daily_average_rating | STRING | None |
total_average_rating | FLOAT64 | None |
_fivetran_synced | STRING | None |
This SQL query performs data cleaning and transformation on the Google Play Store ratings data. It casts columns to appropriate data types, handles null values, and renames some columns. The query also performs some data cleaning by converting 'NA' values to NULL in the daily_average_rating column and conditionally setting values to NULL based on the android_os_version column.
CleaningFeaturizationWITH base AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_ratings_os_version_tmp
), fields AS (
SELECT
CAST(NULL AS TEXT) AS _file,
CAST(NULL AS TIMESTAMP) AS _fivetran_synced,
CAST(NULL AS INT) AS _line,
CAST(NULL AS TIMESTAMP) AS _modified,
CAST(NULL AS TEXT) AS android_os_version,
CAST(NULL AS TEXT) AS daily_average_rating,
CAST(NULL AS DATE) AS date,
CAST(NULL AS TEXT) AS package_name,
CAST(NULL AS FLOAT) AS total_average_rating
FROM base
), final AS (
SELECT
CAST(date AS DATE) AS date_day,
android_os_version,
package_name,
CASE
WHEN android_os_version IS NULL
THEN NULL
ELSE CAST(NULLIF(CAST(daily_average_rating AS TEXT), 'NA') AS FLOAT)
END AS average_rating,
CASE WHEN android_os_version IS NULL THEN NULL ELSE total_average_rating END AS rolling_total_average_rating
FROM fields
GROUP BY
1,
2,
3,
4,
5
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
average_rating | FLOAT64 | Average star rating this app has received across all ratings submitted on a given day. |
date_day | DATE | The date on which the data is reported. |
android_os_version | STRING | Type of device model. May be NULL if users do not consent to being tracked. |
package_name | STRING | The package name of the app owning the report data. |
rolling_total_average_rating | FLOAT64 | Average star rating this app received across all ratings submitted up to and including the past day. For each user submitting a rating, only their most recent rating of the app is counted. |
This SQL query selects all columns and rows from the table 'stats_ratings_os_version' in the 'google_play' schema of the 'TEST' database. It's a simple SELECT * statement that retrieves all data from the specified table without any transformation, filtering, or manipulation.
OtherSELECT
*
FROM TEST.google_play.stats_ratings_os_version
Name | Type | Comment |
---|---|---|
_file | STRING | None |
_line | INT64 | None |
_modified | STRING | None |
date | DATE | None |
package_name | STRING | None |
android_os_version | STRING | None |
daily_average_rating | STRING | None |
total_average_rating | FLOAT64 | None |
_fivetran_synced | STRING | None |
This SQL query performs data type casting and cleaning on the 'stg_google_play__stats_ratings_overview' table. It casts the 'date' column to DATE type, converts the 'daily_average_rating' from TEXT to FLOAT (replacing 'NA' values with NULL), and renames some columns. The query also selects specific columns for the final output.
CleaningOtherWITH base AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__stats_ratings_overview_tmp
), fields AS (
SELECT
CAST(NULL AS TEXT) AS _file,
CAST(NULL AS TIMESTAMP) AS _fivetran_synced,
CAST(NULL AS INT) AS _line,
CAST(NULL AS TIMESTAMP) AS _modified,
CAST(NULL AS TEXT) AS daily_average_rating,
CAST(NULL AS DATE) AS date,
CAST(NULL AS TEXT) AS package_name,
CAST(NULL AS FLOAT) AS total_average_rating
FROM base
), final AS (
SELECT
CAST(date AS DATE) AS date_day,
package_name,
CAST(NULLIF(CAST(daily_average_rating AS TEXT), 'NA') AS FLOAT) AS average_rating,
total_average_rating AS rolling_total_average_rating,
_fivetran_synced
FROM fields
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
average_rating | FLOAT64 | Average star rating this app has received across all ratings submitted on a given day. |
date_day | DATE | The date on which the data is reported. |
package_name | STRING | The package name of the app owning the report data. |
rolling_total_average_rating | FLOAT64 | Average star rating this app received across all ratings submitted up to and including the past day. For each user submitting a rating, only their most recent rating of the app is counted. |
_fivetran_synced | STRING | None |
This SQL query selects all columns and rows from the 'stats_ratings_overview' table in the 'google_play' schema of the 'TEST' database. It's a simple SELECT * statement that retrieves all data from the specified table without any modification, filtering, or transformation.
OtherSELECT
*
FROM TEST.google_play.stats_ratings_overview
Name | Type | Comment |
---|---|---|
a | INT64 | None |
_file | STRING | None |
_line | INT64 | None |
_modified | STRING | None |
date | DATE | None |
package_name | STRING | None |
daily_average_rating | STRING | None |
total_average_rating | FLOAT64 | None |
_fivetran_synced | STRING | None |
This SQL query processes data from a Google Play store performance table. It starts by casting columns to specific data types, then aggregates the data by date, country region, and package name. The query calculates the sum of store listing acquisitions and visitors, and the average conversion rate for each group.
CleaningAggregationWITH base AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__store_performance_country_tmp
), fields AS (
SELECT
CAST(NULL AS TEXT) AS _file,
CAST(NULL AS TIMESTAMP) AS _fivetran_synced,
CAST(NULL AS INT) AS _line,
CAST(NULL AS TIMESTAMP) AS _modified,
CAST(NULL AS TEXT) AS country_region,
CAST(NULL AS DATE) AS date,
CAST(NULL AS TEXT) AS package_name,
CAST(NULL AS INT) AS store_listing_acquisitions,
CAST(NULL AS FLOAT) AS store_listing_conversion_rate,
CAST(NULL AS INT) AS store_listing_visitors
FROM base
), final AS (
SELECT
CAST(date AS DATE) AS date_day,
country_region,
package_name,
SUM(store_listing_acquisitions) AS store_listing_acquisitions,
AVG(store_listing_conversion_rate) AS store_listing_conversion_rate,
SUM(store_listing_visitors) AS store_listing_visitors
FROM fields
GROUP BY
1,
2,
3
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
country_region | STRING | Two-letter abbreviation of the country or region where the user’s Google account is registered. |
date_day | DATE | The date on which the data is reported. |
package_name | STRING | The package name of the app owning the report data. |
store_listing_acquisitions | INT64 | The number of users that visited your store listing and installed your app, who did not have your app installed on any device. |
store_listing_conversion_rate | FLOAT64 | The percentage of store listing visitors who installed your app on a given day. Note: Does not include visits or installs from users who already have your app installed on another device |
store_listing_visitors | INT64 | The number of users that visited your store listing who did not have your app installed on any device. |
This SQL query selects all columns from the 'stats_store_performance_country' table in the 'google_play' schema of the 'TEST' database. It appears to be a simple data extraction query without any transformation, filtering, or aggregation.
OtherSELECT
*
FROM TEST.google_play.stats_store_performance_country
Name | Type | Comment |
---|---|---|
_file | STRING | None |
_line | INT64 | None |
_modified | STRING | None |
date | DATE | None |
package_name | STRING | None |
country_region | STRING | None |
store_listing_acquisitions | INT64 | None |
store_listing_visitors | INT64 | None |
store_listing_conversion_rate | FLOAT64 | None |
_fivetran_synced | STRING | None |
This SQL query performs several operations on data from the Google Play Store performance source. It starts by selecting all columns from a temporary table, then casts them to specific data types. The query then renames some columns, creates a surrogate key using MD5 hash of multiple columns, and selects the final set of columns. The purpose seems to be standardizing the data structure and creating a unique identifier for each record.
CleaningFeaturizationOtherWITH base AS (
SELECT
*
FROM TEST.PUBLIC_google_play_source.stg_google_play__store_performance_source_tmp
), fields AS (
SELECT
CAST(NULL AS TEXT) AS _file,
CAST(NULL AS TIMESTAMP) AS _fivetran_synced,
CAST(NULL AS INT) AS _line,
CAST(NULL AS TIMESTAMP) AS _modified,
CAST(NULL AS DATE) AS date,
CAST(NULL AS TEXT) AS package_name,
CAST(NULL AS TEXT) AS search_term,
CAST(NULL AS INT) AS store_listing_acquisitions,
CAST(NULL AS FLOAT) AS store_listing_conversion_rate,
CAST(NULL AS INT) AS store_listing_visitors,
CAST(NULL AS TEXT) AS traffic_source,
CAST(NULL AS TEXT) AS utm_campaign,
CAST(NULL AS TEXT) AS utm_source
FROM base
), final AS (
SELECT
CAST(date AS DATE) AS date_day,
package_name,
traffic_source,
search_term,
utm_campaign,
utm_source,
store_listing_acquisitions,
store_listing_conversion_rate,
store_listing_visitors,
MD5(
CAST(COALESCE(CAST(date AS TEXT), '_dbt_utils_surrogate_key_null_') || '-' || COALESCE(CAST(package_name AS TEXT), '_dbt_utils_surrogate_key_null_') || '-' || COALESCE(CAST(traffic_source AS TEXT), '_dbt_utils_surrogate_key_null_') || '-' || COALESCE(CAST(search_term AS TEXT), '_dbt_utils_surrogate_key_null_') || '-' || COALESCE(CAST(utm_campaign AS TEXT), '_dbt_utils_surrogate_key_null_') || '-' || COALESCE(CAST(utm_source AS TEXT), '_dbt_utils_surrogate_key_null_') AS TEXT)
) AS traffic_source_unique_key, /* make a surrogate key as the PK involves quite a few columns */
_fivetran_synced
FROM fields
)
SELECT
*
FROM final
Name | Type | Comment |
---|---|---|
traffic_source_unique_key | STRING | Surrogate key hashed on `date_day`, `package_name`, `traffic_source`, `search_term`, `utm_campaign`, and `utm_source`. |
date_day | DATE | The date on which the data is reported. |
package_name | STRING | The package name of the app owning the report data. |
search_term | STRING | The term the user searched for before navigating to your store listing. Only available when the traffic source is Google Play search. Note: Can be Other this value does not reach certain minimum thresholds |
store_listing_acquisitions | INT64 | The number of users that visited your store listing and installed your app, who did not have your app installed on any device. |
store_listing_conversion_rate | FLOAT64 | The percentage of store listing visitors who installed your app on a given day. Note: Does not include visits or installs from users who already have your app installed on another device |
store_listing_visitors | INT64 | The number of users that visited your store listing who did not have your app installed on any device. |
traffic_source | STRING | How the user got to your store listing: Google Play search, Third-party referral, Google Play explore, or Other. |
utm_campaign | STRING | The value of the utm_campaign URL parameter in deep links to your store listing. Only available when the traffic source is a third-party referral. Note: Can be Other this value does not reach certain minimum thresholds |
utm_source | STRING | The value of the utm_source URL parameter in deep links to your store listing. Only available when the traffic source is a third-party referral. Note: Can be Other this value does not reach certain minimum thresholds |
_fivetran_synced | STRING | None |
This SQL query selects all columns from the 'stats_store_performance_traffic_source' table in the 'google_play' schema of the 'TEST' database. It appears to be a simple SELECT * statement without any filtering, transformation, or aggregation.
OtherSELECT
*
FROM TEST.google_play.stats_store_performance_traffic_source
Name | Type | Comment |
---|---|---|
_file | STRING | None |
_line | INT64 | None |
_modified | STRING | None |
date | DATE | None |
package_name | STRING | None |
traffic_source | STRING | None |
search_term | STRING | None |
utm_source | STRING | None |
utm_campaign | STRING | None |
store_listing_acquisitions | INT64 | None |
store_listing_visitors | INT64 | None |
store_listing_conversion_rate | FLOAT64 | None |
_fivetran_synced | STRING | None |