Fighting Insider Trading: R Code

				
					Certainly, here's the complete R code that the attorney used to uncover the patterns of insider trading:

```r
# Load necessary libraries
library(tidyverse)
library(readr)
library(lubridate)

# Import data (replace 'path_to_file' with the actual path to your CSV file)
communications <- read_csv("path_to_file/communications.csv")

# Data Cleaning and Tidying
communications_tidy <- communications %>%
    mutate(
        Timestamp = as.POSIXct(Timestamp, format = "%m/%d/%Y %H:%M"),
        Date = as.Date(Timestamp),
        Time = format(Timestamp, "%H:%M")
    ) %>%
    select(Sender, Recipient, Date, Time, Content, Subject)

# Feature Extraction for suspicious keywords
communications_features <- communications_tidy %>%
    mutate(
        is_suspicious = grepl("urgent|confidential|insider", Content, ignore.case = TRUE)
    )

# Summarize to identify days with any suspicious messages
suspicious_days <- communications_features %>%
    group_by(Date) %>%
    summarise(
        total_messages = n(),
        any_suspicious = any(is_suspicious)
    )

# Assuming 'trades' data frame already exists, if not create a dummy 'trades' indicating trade activity
trades <- tibble(
    Date = as.Date(c("3/8/2024", "3/9/2024", "3/10/2024")),
    trade_activity = c("Normal", "Suspicious", "Suspicious")
)

# Combine with the communication data
combined_data <- left_join(suspicious_days, trades, by = "Date") %>%
    mutate(trade_activity = if_else(any_suspicious, "Suspicious", "Normal"))

# Plotting the final visualization
final_plot <- ggplot(combined_data, aes(x = Date, y = total_messages)) +
    geom_col() +
    geom_point(aes(color = trade_activity), size = 4) +
    scale_color_manual(values = c("Normal" = "grey", "Suspicious" = "red")) +
    theme_minimal() +
    labs(
        title = "Communication Timeline with Suspicious Trades",
        x = "Date",
        y = "Total Messages",
        color = "Trade Activity"
    ) +
    scale_y_continuous(breaks = seq(0, max(combined_data$total_messages, na.rm = TRUE), by = 1)) +
    expand_limits(y = 0)

# Print the plot
print(final_plot)
```
#Remember to replace "path_to_file/communications.csv" with the actual path to the communications data file. After running this script, you should see a visual representation of communication frequencies by date, with suspicious activities highlighted.