Scripts
These scripts can be useful to review the data on the server
Number of ACLED events by country
Number of ACLED events by country
SELECT e.country,e.iso3, count(*),max(event_date) as last_date
FROM draft_schema.acled_events as a
left join draft_schema.equivalency_table as e
on a.iso=e.iso3166num
group by e.country,e.iso3
Check and remove duplicated events
DELETE FROM draft_schema.acled_events
WHERE (event_id_cnty, timestamp) NOT IN (
SELECT event_id_cnty, MAX(timestamp)
FROM draft_schema.acled_events
GROUP BY event_id_cnty
);
#Remove duplication
WITH latest_events AS (
SELECT event_id_cnty, MAX(timestamp) AS latest_timestamp
FROM draft_schema.acled_events
GROUP BY event_id_cnty
)
DELETE FROM draft_schema.acled_events
USING latest_events
WHERE draft_schema.acled_events.event_id_cnty = latest_events.event_id_cnty
AND draft_schema.acled_events.timestamp < latest_events.latest_timestamp;
#Check duplication
SELECT event_id_cnty
FROM draft_schema.acled_events
GROUP BY event_id_cnty
HAVING COUNT(*) > 1;
# code
DELETE FROM draft_schema.acled_events a
USING (
SELECT ctid,
ROW_NUMBER() OVER (PARTITION BY event_id_cnty ORDER BY event_id_cnty) AS rn
FROM draft_schema.acled_events
) b
WHERE a.ctid = b.ctid AND b.rn > 1;
Latest timestamps
SELECT e.country,e.iso3, MAX(timestamp) as latest_timestamp
FROM draft_schema.acled_events as a
left join draft_schema.equivalency_table as e
on a.iso=e.iso3166num
group by e.country,e.iso3