Python

Working with CSV Financial Data in Python

Master reading, writing, and analysing CSV financial data with Python and pandas. Clean messy data and generate insights.

Getting Started

import pandas as pd

# Read a CSV file
df = pd.read_csv("stock_data.csv", parse_dates=["Date"], index_col="Date")
print(df.head())
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print(f"Date range: {df.index.min()} to {df.index.max()}")

# Basic statistics
print(df.describe())

Core Implementation

# Clean messy financial data
df = pd.read_csv("messy_data.csv")

# Remove currency symbols and convert to float
df["Price"] = df["Price"].str.replace("£", "").str.replace(",", "").astype(float)
df["Volume"] = df["Volume"].str.replace(",", "").astype(int)

# Handle missing values
print(f"Missing values:\n{df.isnull().sum()}")
df["Price"] = df["Price"].fillna(method="ffill")  # Forward fill prices
df = df.dropna(subset=["Volume"])  # Drop rows with no volume

# Remove duplicates
df = df.drop_duplicates(subset=["Date"])

# Sort by date
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values("Date")

print(f"Cleaned data: {len(df)} rows")

Extended Example

# Export processed data
df.to_csv("cleaned_data.csv", index=False)

# Export to Excel with formatting
with pd.ExcelWriter("financial_report.xlsx") as writer:
    df.to_excel(writer, sheet_name="Raw Data", index=False)

    # Summary sheet
    summary = df.describe()
    summary.to_excel(writer, sheet_name="Summary")

    # Monthly aggregation
    monthly = df.set_index("Date").resample("M").agg({
        "Price": ["mean", "min", "max"],
        "Volume": "sum"
    })
    monthly.to_excel(writer, sheet_name="Monthly")

print("Report saved to financial_report.xlsx")