import numpy as np
from statsmodels import regression
import matplotlib.pyplot as plt
import pandas as pd

import numpy as np
from zipline.pipeline import Pipeline, sharadar, EquityPricing
from zipline.pipeline.factors import Returns
        
def make_pipeline():
    """
    Create and return our pipeline.
    
    We break this piece of logic out into its own function to make it easier to
    test and modify in isolation.
    
    In particular, this function can be copy/pasted into research and run by itself.
    """
    pipe = Pipeline()

    Fundamentals = sharadar.Fundamentals.slice(dimension='ARQ', period_offset=0)
    
    # Add our factors to the pipeline
    market_cap = Fundamentals.MARKETCAP.latest
    
    # Raw market cap and book to price data gets fed in here
    pipe.add(market_cap, "market_cap")
    book_to_price = 1/Fundamentals.PB.latest
    pipe.add(book_to_price, "book_to_price")
    
    # We also get daily returns
    returns = Returns(inputs=[EquityPricing.close], window_length=2)
    pipe.add(returns, "returns")
    
    # We compute a daily rank of both factors, this is used in the next step,
    # which is computing portfolio membership.
    market_cap_rank = market_cap.rank()
    pipe.add(market_cap_rank, 'market_cap_rank')
    
    book_to_price_rank = book_to_price.rank()
    pipe.add(book_to_price_rank, 'book_to_price_rank')

    # Build Filters representing the top and bottom 1000 stocks by our combined ranking system.
    biggest = market_cap_rank.top(1000)
    smallest = market_cap_rank.bottom(1000)
    
    highpb = book_to_price_rank.top(1000)
    lowpb = book_to_price_rank.bottom(1000)
    
    # Don't return anything not in this set, as we don't need it.
    pipe.set_screen(biggest | smallest | highpb | lowpb)
    
    # Add the boolean flags we computed to the output data
    pipe.add(biggest, 'biggest')
    pipe.add(smallest, 'smallest')
    
    pipe.add(highpb, 'highpb')
    pipe.add(lowpb, 'lowpb')
    
    return pipe

pipe = make_pipeline()

from zipline.research import run_pipeline

start_date = '2014-01-01'
end_date = '2015-01-01'

results = run_pipeline(pipe, start_date=start_date, end_date=end_date, bundle='sharadar-1d')

R_biggest = results[results.biggest]['returns'].groupby(level=0).mean()
R_smallest = results[results.smallest]['returns'].groupby(level=0).mean()

R_highpb = results[results.highpb]['returns'].groupby(level=0).mean()
R_lowpb = results[results.lowpb]['returns'].groupby(level=0).mean()

SMB = R_smallest - R_biggest
HML = R_highpb - R_lowpb

SMB_CUM = np.cumprod(SMB+1)
HML_CUM = np.cumprod(HML+1)

plt.plot(SMB_CUM.index, SMB_CUM.values)
plt.plot(HML_CUM.index, HML_CUM.values)
plt.ylabel('Cumulative Return')
plt.legend(['SMB Portfolio Returns', 'HML Portfolio Returns']);

from quantrocket.master import get_securities
from quantrocket import get_prices

securities = get_securities(symbols=['MSFT', 'AAPL', 'YHOO', 'FB', 'TSLA'], vendors='sharadar')

# Get returns data for our portfolio
portfolio = get_prices(
    'sharadar-1d',
    data_frequency='daily',
    sids=securities.index.tolist(),
    fields='Close', 
    start_date=start_date, 
    end_date=end_date).loc['Close'].pct_change()[1:]
R = np.mean(portfolio, axis=1)

SPY = get_securities(symbols='SPY', vendors='usstock').index[0]

bench = get_prices(
    'sharadar-1d', 
    data_frequency='daily',
    sids=SPY,
    fields='Close', 
    start_date=start_date, 
    end_date=end_date).loc['Close'][SPY].pct_change()[1:]

# The excess returns of our active management, in this case just holding a portfolio of our one asset
active = R - bench

# Define a constant to compute intercept
constant = pd.Series(np.ones(len(active.index)), index=active.index)

df = pd.DataFrame({'R': active,
              'F1': SMB,
              'F2': HML,
              'Constant': constant})
df = df.dropna()

# Perform linear regression to get the coefficients in the model
b1, b2 = regression.linear_model.OLS(df['R'], df[['F1', 'F2']]).fit().params

# Print the coefficients from the linear regression
print('Sensitivities of active returns to factors:\nSMB: %f\nHML: %f' %  (b1, b2))

Sensitivities of active returns to factors:
SMB: -0.078169
HML: -0.133842

F1 = df['F1']
F2 = df['F2']
cov = np.cov(F1, F2)
ar_squared = (active.std())**2
fmcar1 = (b1*(b2*cov[0,1] + b1*cov[0,0]))/ar_squared
fmcar2 = (b2*(b1*cov[0,1] + b2*cov[1,1]))/ar_squared
print('SMB Risk Contribution:', fmcar1)
print('HML Risk Contribution:', fmcar2)

SMB Risk Contribution: 0.003525912039738247
HML Risk Contribution: 0.006301770441936193

Factor Risk Exposure¶

DISCLAIMER¶

Using Factor Models to Determine Risk Exposure¶

How many factors do you want?¶

Setup¶

Computing Risk Exposure¶

Factor and tracking portfolios¶

Portfolio Exposure¶

Example¶

How to Use Risk Exposure Models¶

Risk Management¶

Hedging¶