Source code for time_series_transform.stock_transform.stock_extractor
import threading
import numpy as np
import pandas as pd
from time_series_transform.stock_transform.base import *
from time_series_transform.stock_transform.stock_engine._investing import investing
from time_series_transform.stock_transform.stock_engine._yahoo_stock import yahoo_stock
from datetime import date, timedelta
[docs]class Stock_Extractor(object):
def __init__(self,symbol,engine, *args, **kwargs):
"""
Stock_Extractor extracts data of the given symbol
using the selected engine
For investing engine: country is required.
for example, Stock_Extractor('aapl','investing', country = 'united states')
Parameters
----------
symbol : str
symbol of the stock
engine : str
engine used for data extraction
"""
self.client = self._get_extractor(engine)(symbol, *args, **kwargs)
self.symbol = symbol
self.stock = None
def _get_extractor(self,engine):
engineDict = {
'yahoo': yahoo_stock,
'investing': investing
}
return engineDict[engine]
[docs] def get_period(self,period):
"""
get_period extracts the stock data of the selected
period
Parameters
----------
period : str
period of the data
for example, 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
Returns
-------
stock data
The stock data of selected period
"""
data = self.client.getHistoricalByPeriod(period)
data = pd.DataFrame(data.to_records())
data['Date'] = data.Date.astype(str)
additionalInfo = self.client.getAdditionalInfo()
self.stock = Stock(
data,
time_index='Date',
symbol=self.symbol
)
return self.stock
[docs] def get_date(self,start_date,end_date):
"""
get_period extracts the stock data of the selected
period
Parameters
----------
start_date : str
start of the data
format: "%Y-%m-%d", eg "2020-02-20"
end_date : str
end of the data
Returns
-------
stock data
The stock data of selected period
"""
data = self.client.getHistoricalByRange(start_date,end_date)
data = pd.DataFrame(data.to_records())
data['Date'] = data.Date.astype(str)
additionalInfo = self.client.getAdditionalInfo()
self.stock = Stock(
data,
time_index='Date',
symbol = self.symbol
)
return self.stock
[docs] def get_intra_day(self,start_date,end_date,interval = '1m'):
"""
get_intra_day extracts the intraday stock data of the selected
period
Parameters
----------
start_date : str
start of the data
format: "%Y-%m-%d", eg "2020-02-20"
end_date : str
end of the data
interval : str
interval of the data
Valid intervals: [1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h]
Returns
-------
stock data
The stock data of selected period
"""
data = self.client.getIntraDayData(start_date,end_date,interval)
data = pd.DataFrame(data.to_records())
data['Datetime'] = data.Datetime.astype(str)
self.stock= Stock(
data,
time_index = 'Datetime',
symbol = self.symbol
)
return self.stock
[docs]class Portfolio_Extractor(object):
def __init__(self,symbolList,engine, *args, **kwargs):
"""
Portfolio_Extractor extracts data of the given symbolList
using the selected engine
Parameters
----------
symbolList : list
list of symbol
engine : str
engine used for data extraction
"""
self.engine = engine
self.symbolList = symbolList
self.portfolio = None
self.args = args
self.kwargs = kwargs
[docs] def get_period(self,period, n_threads= 8):
"""
get_period extracts the list of stock
by the given period
Parameters
----------
period : str
period of the data
for example, 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
n_threads : int
number of thread of multi-thread processing
Returns
-------
portfolio
portfolio data of the given stock list
"""
stockList = self._get_stock_list_multi(n_threads,'get_period', [period])
self.portfolio = Portfolio(
stockList,
time_index='Date',
symbolIx='symbol'
)
return self.portfolio
[docs] def get_date(self,start_date, end_date, n_threads = 8):
"""
get_portfolio_date extracts the list of stock
by the date period
Parameters
----------
start_date : str
start of the data
format: "%Y-%m-%d", eg "2020-02-20"
end_date : str
end of the data
n_threads : int
number of thread of multi-thread processing
Returns
-------
portfolio
portfolio data of the given stock list
"""
stockList = self._get_stock_list_multi(n_threads,'get_date', [start_date, end_date])
self.portfolio = Portfolio(
stockList,
time_index='Date',
symbolIx='symbol'
)
return self.portfolio
[docs] def get_intra_day(self,start_date, end_date, interval = '1m', n_threads = 8):
"""
get_intra_day extracts the intraday data of the list of stock data
by the date period
Parameters
----------
start_date : str
start of the data
format: "%Y-%m-%d", eg "2020-02-20"
end_date : str
end of the data
interval : str
interval of the data
Valid intervals: [1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h]
n_threads : int
number of thread of multi-thread processing
Returns
-------
portfolio
portfolio data of the given stock list
"""
stockList = self._get_stock_list_multi(n_threads,'get_intra_day', [start_date, end_date, interval])
self.portfolio = Portfolio(
stockList,
time_index='Datetime',
symbolIx='symbol'
)
return self.portfolio
def _get_stock_list_multi(self, n_threads, func, time_val):
stockList = []
tasks = []
if len(self.symbolList) < n_threads:
n_threads = len(self.symbolList)
bins = np.array_split(self.symbolList, n_threads)
for bn in bins:
thread = threading.Thread(target=self._get_stock_data, args= [stockList, bn, func, time_val])
tasks.append(thread)
thread.start()
for task in tasks:
task.join()
stockDict = {}
for i in stockList:
stockDict.update(i)
return stockDict
def _get_stock_data(self, stockList, symbolList, func, time_val, *args, **kwargs):
for i in range(len(symbolList)):
symbol = symbolList[i]
if self.engine == "investing":
if 'country' not in self.kwargs:
raise ValueError("Country must be included while using the investing engine")
country = self.kwargs['country'][i]
stock_data = Stock_Extractor(symbol, self.engine, *self.args, country = country)
else:
stock_data = Stock_Extractor(symbol, self.engine, *self.args, **self.kwargs)
extract_func = getattr(stock_data,func)
stock_data = extract_func(*time_val)
stockList.append({symbol:stock_data})
return stockList