Certainly, here's the complete R code that the attorney used to uncover the patterns of insider trading:
```r
# Load necessary libraries
library(tidyverse)
library(readr)
library(lubridate)
# Import data (replace 'path_to_file' with the actual path to your CSV file)
communications <- read_csv("path_to_file/communications.csv")
# Data Cleaning and Tidying
communications_tidy <- communications %>%
mutate(
Timestamp = as.POSIXct(Timestamp, format = "%m/%d/%Y %H:%M"),
Date = as.Date(Timestamp),
Time = format(Timestamp, "%H:%M")
) %>%
select(Sender, Recipient, Date, Time, Content, Subject)
# Feature Extraction for suspicious keywords
communications_features <- communications_tidy %>%
mutate(
is_suspicious = grepl("urgent|confidential|insider", Content, ignore.case = TRUE)
)
# Summarize to identify days with any suspicious messages
suspicious_days <- communications_features %>%
group_by(Date) %>%
summarise(
total_messages = n(),
any_suspicious = any(is_suspicious)
)
# Assuming 'trades' data frame already exists, if not create a dummy 'trades' indicating trade activity
trades <- tibble(
Date = as.Date(c("3/8/2024", "3/9/2024", "3/10/2024")),
trade_activity = c("Normal", "Suspicious", "Suspicious")
)
# Combine with the communication data
combined_data <- left_join(suspicious_days, trades, by = "Date") %>%
mutate(trade_activity = if_else(any_suspicious, "Suspicious", "Normal"))
# Plotting the final visualization
final_plot <- ggplot(combined_data, aes(x = Date, y = total_messages)) +
geom_col() +
geom_point(aes(color = trade_activity), size = 4) +
scale_color_manual(values = c("Normal" = "grey", "Suspicious" = "red")) +
theme_minimal() +
labs(
title = "Communication Timeline with Suspicious Trades",
x = "Date",
y = "Total Messages",
color = "Trade Activity"
) +
scale_y_continuous(breaks = seq(0, max(combined_data$total_messages, na.rm = TRUE), by = 1)) +
expand_limits(y = 0)
# Print the plot
print(final_plot)
```
#Remember to replace "path_to_file/communications.csv" with the actual path to the communications data file. After running this script, you should see a visual representation of communication frequencies by date, with suspicious activities highlighted.