Spaces:
Running
Running
Robert Castagna
commited on
Commit
·
1fce135
1
Parent(s):
70fcc09
adding postgres db as well as fixing API endpoints -- 'yfinance' broke
Browse files- .gitignore +2 -1
- packages.txt +1 -0
- pages/1_Fundamentals.py +153 -52
- pages/2_Portfolio_Builder.py +1 -1
- requirements.txt +4 -2
- test.py +12 -0
.gitignore
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
secrets.json
|
| 2 |
edgar-crawler/
|
| 3 |
.venv/
|
| 4 |
-
.env
|
|
|
|
|
|
| 1 |
secrets.json
|
| 2 |
edgar-crawler/
|
| 3 |
.venv/
|
| 4 |
+
.env
|
| 5 |
+
.streamlit/
|
packages.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
libpq-dev
|
pages/1_Fundamentals.py
CHANGED
|
@@ -6,8 +6,9 @@ import streamlit as st
|
|
| 6 |
import yfinance as yf
|
| 7 |
from datetime import timedelta
|
| 8 |
from dotenv import load_dotenv
|
|
|
|
| 9 |
|
| 10 |
-
load_dotenv()
|
| 11 |
|
| 12 |
def get_finnhub_data(example: str) -> json:
|
| 13 |
"""
|
|
@@ -96,8 +97,72 @@ def get_list_of_tickers():
|
|
| 96 |
list_of_tickers.append(comp_info[i]['symbol'])
|
| 97 |
return list_of_tickers
|
| 98 |
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
|
|
|
| 101 |
if 'tickers' not in st.session_state:
|
| 102 |
tickers = [
|
| 103 |
"AAPL", "MSFT", "GOOG", "NVDA", "TSLA",
|
|
@@ -115,50 +180,85 @@ with st.form(key="selecting columns"):
|
|
| 115 |
st.session_state['tickers'] = symbols
|
| 116 |
|
| 117 |
if submit_button and symbols and strategy_selection == 'Value':
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
|
|
|
| 137 |
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
st.session_state['tickers'] = symbols
|
| 162 |
|
| 163 |
if submit_button and symbols and strategy_selection == 'Growth':
|
| 164 |
|
|
@@ -168,6 +268,7 @@ with st.form(key="selecting columns"):
|
|
| 168 |
basic_info = get_industry(ticker)
|
| 169 |
metric_data, annual_series_data, quarterly_series_data = get_company_metrics(ticker)
|
| 170 |
|
|
|
|
| 171 |
# reformat all JSON returns to be flattened dictionaries
|
| 172 |
roe_dict = {'roe': annual_series_data['roe'][0]['v'] if ('roe' in annual_series_data) and (len(annual_series_data['roe'])>1) and ('v' in annual_series_data['roe'][0]) else 0}
|
| 173 |
ev_dict = {'ev' :annual_series_data['ev'][0]['v'] if ('ev' in annual_series_data) and (len(annual_series_data['ev'])>1) and ('v' in annual_series_data['ev'][0]) else 0}
|
|
@@ -184,22 +285,22 @@ with st.form(key="selecting columns"):
|
|
| 184 |
hash_map[ticker] = combined_info
|
| 185 |
|
| 186 |
# equity gains
|
| 187 |
-
_, div, close_price = get_equity_gains(ticker=ticker, period=1810)
|
| 188 |
-
gains_data[ticker] = [div, close_price]
|
| 189 |
|
| 190 |
|
| 191 |
# Now, create a DataFrame from the hash_map
|
| 192 |
df_1 = pd.DataFrame.from_dict(hash_map, orient='index')[['finnhubIndustry','roe','marketCapitalization','ebitdPerShareAnnual','pe','ps','pb','salesPerShare','eps','epsGrowth5Y','ev','operatingMarginAnnual', 'ebitdPerShareTTM', 'ebitdaCagr5Y', 'ebitdaInterimCagr5Y']]
|
| 193 |
-
df_2 = pd.DataFrame.from_dict(gains_data, orient='index', columns=['Recent Dividend','Price'])
|
| 194 |
-
df_final = df_1.join(df_2)
|
| 195 |
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
st.write(
|
| 199 |
|
| 200 |
st.session_state['tickers'] = symbols
|
| 201 |
|
| 202 |
if submit_button and symbols and strategy_selection == 'Bypass':
|
| 203 |
-
st.write('You have selected to bypass the metrics calculation for the selected tickers.')
|
| 204 |
st.session_state['tickers'] = symbols
|
| 205 |
|
|
|
|
| 6 |
import yfinance as yf
|
| 7 |
from datetime import timedelta
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
+
import psycopg2
|
| 10 |
|
| 11 |
+
load_dotenv(override=True)
|
| 12 |
|
| 13 |
def get_finnhub_data(example: str) -> json:
|
| 14 |
"""
|
|
|
|
| 97 |
list_of_tickers.append(comp_info[i]['symbol'])
|
| 98 |
return list_of_tickers
|
| 99 |
|
| 100 |
+
def handle_json_data(data, ticker) -> tuple[list[pd.DataFrame], int]:
|
| 101 |
+
|
| 102 |
+
df_list = []
|
| 103 |
+
|
| 104 |
+
for key, val in data.items():
|
| 105 |
+
|
| 106 |
+
if val and hasattr(val, '__getitem__') and isinstance(val[0], dict):
|
| 107 |
+
#print('IS time series data')
|
| 108 |
+
|
| 109 |
+
temporal_df = pd.DataFrame(val)
|
| 110 |
+
temporal_df = temporal_df.rename({'v':key}, axis=1)
|
| 111 |
+
temporal_df['ticker'] = ticker
|
| 112 |
+
axis = 1
|
| 113 |
+
|
| 114 |
+
if not temporal_df.empty:
|
| 115 |
+
temporal_df = temporal_df.set_index(['period','ticker'])
|
| 116 |
+
df_list.append(temporal_df)
|
| 117 |
+
|
| 118 |
+
elif len(val) > 0:
|
| 119 |
+
#print('not time series data')
|
| 120 |
+
non_temporal_df = pd.DataFrame(data.items(), columns=['ticker', ticker])
|
| 121 |
+
non_temporal_df = non_temporal_df.set_index('ticker').T
|
| 122 |
+
df_list.append(non_temporal_df)
|
| 123 |
+
axis = 0
|
| 124 |
+
break
|
| 125 |
+
else:
|
| 126 |
+
print("dict key has no data.")
|
| 127 |
+
|
| 128 |
+
return df_list, axis
|
| 129 |
+
|
| 130 |
+
def insert_dataframe(cursor, table_name, df):
|
| 131 |
+
"""
|
| 132 |
+
Insert a DataFrame into the specified table.
|
| 133 |
+
"""
|
| 134 |
+
# Prepare the INSERT statement dynamically
|
| 135 |
+
columns = df.columns
|
| 136 |
+
columns_str = ', '.join(columns)
|
| 137 |
+
#print('cols for insert', columns_str)
|
| 138 |
+
placeholders = ', '.join(['%s'] * len(columns))
|
| 139 |
+
# get primary keys for insert collision handling
|
| 140 |
+
pkey_query = f"""
|
| 141 |
+
SELECT c.column_name
|
| 142 |
+
FROM information_schema.table_constraints tc
|
| 143 |
+
JOIN information_schema.constraint_column_usage AS ccu USING (constraint_schema, constraint_name)
|
| 144 |
+
JOIN information_schema.columns AS c ON c.table_schema = tc.constraint_schema
|
| 145 |
+
AND tc.table_name = c.table_name AND ccu.column_name = c.column_name
|
| 146 |
+
WHERE constraint_type in ('UNIQUE', 'PRIMARY KEY') and tc.table_name = '{table_name}';
|
| 147 |
+
"""
|
| 148 |
+
cursor.execute(pkey_query)
|
| 149 |
+
pkeys_tuple = cursor.fetchall()
|
| 150 |
+
pkeys_list = [col[0] for col in pkeys_tuple]
|
| 151 |
+
pkeys_str = ', '.join(pkeys_list)
|
| 152 |
+
|
| 153 |
+
insert_query = f"INSERT INTO {table_name} ({columns_str}) VALUES ({placeholders}) ON CONFLICT ({pkeys_str}) DO NOTHING"
|
| 154 |
+
|
| 155 |
+
# Ensure DataFrame has the required columns
|
| 156 |
+
df = df[columns]
|
| 157 |
+
|
| 158 |
+
# Convert DataFrame to list of tuples
|
| 159 |
+
data = list(df.itertuples(index=False, name=None))
|
| 160 |
+
|
| 161 |
+
# Execute batch insert
|
| 162 |
+
cursor.executemany(insert_query, data)
|
| 163 |
+
print('df inserted.')
|
| 164 |
|
| 165 |
+
# ---------------------------------------------------------------------------------------------- #
|
| 166 |
if 'tickers' not in st.session_state:
|
| 167 |
tickers = [
|
| 168 |
"AAPL", "MSFT", "GOOG", "NVDA", "TSLA",
|
|
|
|
| 180 |
st.session_state['tickers'] = symbols
|
| 181 |
|
| 182 |
if submit_button and symbols and strategy_selection == 'Value':
|
| 183 |
+
|
| 184 |
+
# Mapping variables to target tables. keys are dataframe names, values are (write mode, db table)
|
| 185 |
+
table_mapping = {
|
| 186 |
+
'quarterly_series_data': ('APPEND', 'quarterly_financials'),
|
| 187 |
+
'basic_info': ('OVERWRITE', 'companies')
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
dbparams = {
|
| 191 |
+
'dbname':"hf_fundamentals",
|
| 192 |
+
'user':"postgres",
|
| 193 |
+
'password':os.environ['postgres_pwd'],
|
| 194 |
+
'host':"localhost",
|
| 195 |
+
'port':"5432"
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
try:
|
| 199 |
+
with psycopg2.connect(**dbparams) as conn:
|
| 200 |
+
conn.autocommit=True
|
| 201 |
+
|
| 202 |
+
with conn.cursor() as cur:
|
| 203 |
|
| 204 |
+
query_companies = "SELECT * FROM public.quarterly_financials where ticker = %s;" # works because of foreign key relation
|
| 205 |
+
|
| 206 |
+
all_data = {}
|
| 207 |
+
|
| 208 |
+
for ticker in symbols:
|
| 209 |
+
# first, check if the ticker has a record in the database
|
| 210 |
+
cur.execute(query_companies, (ticker,))
|
| 211 |
+
results = cur.fetchall()
|
| 212 |
+
|
| 213 |
+
if len(results) == 0: # if not in database, call api's and store in db
|
| 214 |
+
print("no data found in db")
|
| 215 |
+
# make all the API calls and capture return json
|
| 216 |
+
basic_info = get_industry(ticker)
|
| 217 |
+
metric_data, annual_series_data, quarterly_series_data = get_company_metrics(ticker)
|
| 218 |
+
|
| 219 |
+
api_data_mapping = {'basic_info':basic_info,
|
| 220 |
+
# 'metric_data':metric_data,
|
| 221 |
+
#'annual_series_data':annual_series_data,
|
| 222 |
+
'quarterly_series_data':quarterly_series_data}
|
| 223 |
+
|
| 224 |
+
for df_name, df in api_data_mapping.items():
|
| 225 |
+
df_main_list = []
|
| 226 |
+
|
| 227 |
+
# creates a list of dataframes where each index corresponds to a ticker
|
| 228 |
+
df_list, axis = handle_json_data(df, ticker)
|
| 229 |
+
df_main = pd.concat(df_list, axis=axis)
|
| 230 |
+
df_main_list.append(df_main)
|
| 231 |
+
|
| 232 |
+
# craft a dict where key is data source and values are list of datafames for every ticker
|
| 233 |
+
if df_name not in all_data:
|
| 234 |
+
all_data[df_name] = df_main_list
|
| 235 |
+
else:
|
| 236 |
+
print('redundant.')
|
| 237 |
+
|
| 238 |
+
else:
|
| 239 |
+
print("retrieved from db.")
|
| 240 |
+
st.write(results[0])
|
| 241 |
|
| 242 |
+
for dataframe_name, data in all_data.items():
|
| 243 |
+
|
| 244 |
+
write_mode, target_table = table_mapping[dataframe_name]
|
| 245 |
+
print(write_mode, dataframe_name, target_table)
|
| 246 |
+
|
| 247 |
+
# combines all tickers into one dataframe for a given data source
|
| 248 |
+
df_all_tickers = pd.concat(data)
|
| 249 |
+
|
| 250 |
+
print(ticker)
|
| 251 |
+
unstacked_df = df_all_tickers.reset_index()
|
| 252 |
+
|
| 253 |
+
if 'index' in unstacked_df.columns: # hack fix for single index dataframes ..
|
| 254 |
+
unstacked_df=unstacked_df.drop('ticker', axis=1)
|
| 255 |
+
unstacked_df.rename(columns={'index':'ticker'}, inplace = True)
|
| 256 |
+
|
| 257 |
+
insert_dataframe(cur, target_table, unstacked_df)
|
| 258 |
+
|
| 259 |
+
st.session_state['tickers'] = symbols
|
| 260 |
+
except:
|
| 261 |
+
print("You do not have an active Postgres instance running. Select 'Bypass' and continue to Portfolio Builder.")
|
|
|
|
|
|
|
| 262 |
|
| 263 |
if submit_button and symbols and strategy_selection == 'Growth':
|
| 264 |
|
|
|
|
| 268 |
basic_info = get_industry(ticker)
|
| 269 |
metric_data, annual_series_data, quarterly_series_data = get_company_metrics(ticker)
|
| 270 |
|
| 271 |
+
|
| 272 |
# reformat all JSON returns to be flattened dictionaries
|
| 273 |
roe_dict = {'roe': annual_series_data['roe'][0]['v'] if ('roe' in annual_series_data) and (len(annual_series_data['roe'])>1) and ('v' in annual_series_data['roe'][0]) else 0}
|
| 274 |
ev_dict = {'ev' :annual_series_data['ev'][0]['v'] if ('ev' in annual_series_data) and (len(annual_series_data['ev'])>1) and ('v' in annual_series_data['ev'][0]) else 0}
|
|
|
|
| 285 |
hash_map[ticker] = combined_info
|
| 286 |
|
| 287 |
# equity gains
|
| 288 |
+
# _, div, close_price = get_equity_gains(ticker=ticker, period=1810)
|
| 289 |
+
# gains_data[ticker] = [div, close_price]
|
| 290 |
|
| 291 |
|
| 292 |
# Now, create a DataFrame from the hash_map
|
| 293 |
df_1 = pd.DataFrame.from_dict(hash_map, orient='index')[['finnhubIndustry','roe','marketCapitalization','ebitdPerShareAnnual','pe','ps','pb','salesPerShare','eps','epsGrowth5Y','ev','operatingMarginAnnual', 'ebitdPerShareTTM', 'ebitdaCagr5Y', 'ebitdaInterimCagr5Y']]
|
| 294 |
+
# df_2 = pd.DataFrame.from_dict(gains_data, orient='index', columns=['Recent Dividend','Price'])
|
| 295 |
+
# df_final = df_1.join(df_2)
|
| 296 |
|
| 297 |
+
df_1['PE/G'] = df_1['pe'] / df_1['epsGrowth5Y']
|
| 298 |
+
df_1.rename({'finnhubIndustry':'Industry','marketCapitalization':'MarketCap','roe':'ROE', 'ev':'Enterp. Val', 'pe':'P/E', 'ps':'P/S', 'pb':'P/B', 'eps': 'EPS'}, inplace=True, axis=1)
|
| 299 |
+
st.write(df_1)
|
| 300 |
|
| 301 |
st.session_state['tickers'] = symbols
|
| 302 |
|
| 303 |
if submit_button and symbols and strategy_selection == 'Bypass':
|
| 304 |
+
st.write('You have selected to bypass the metrics calculation for the selected tickers. Proceed to Portfolio Builder.')
|
| 305 |
st.session_state['tickers'] = symbols
|
| 306 |
|
pages/2_Portfolio_Builder.py
CHANGED
|
@@ -115,7 +115,7 @@ with st.form(key="selecting columns"):
|
|
| 115 |
obb
|
| 116 |
.equity
|
| 117 |
.price
|
| 118 |
-
.historical(tickers, start_date=start_date, end_date=end_date, provider="
|
| 119 |
.to_df()
|
| 120 |
.pivot(columns="symbol", values="close")
|
| 121 |
)
|
|
|
|
| 115 |
obb
|
| 116 |
.equity
|
| 117 |
.price
|
| 118 |
+
.historical(tickers, start_date=start_date, end_date=end_date, provider="fmp")
|
| 119 |
.to_df()
|
| 120 |
.pivot(columns="symbol", values="close")
|
| 121 |
)
|
requirements.txt
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
openbb==4.1.4
|
| 2 |
riskfolio-lib==5.0.1
|
| 3 |
transformers==4.36.2
|
|
@@ -8,7 +10,7 @@ beautifulsoup4==4.12.3
|
|
| 8 |
click==7.0
|
| 9 |
cssutils==1.0.2
|
| 10 |
numpy==1.24.4
|
| 11 |
-
lxml==4.9.1
|
| 12 |
requests>=2.31.0
|
| 13 |
urllib3>=1.26.7
|
| 14 |
pandas>=1.5.3
|
|
@@ -20,4 +22,4 @@ regex==2024.5.15
|
|
| 20 |
yfinance==0.2.28
|
| 21 |
python-dotenv==1.0.1
|
| 22 |
asyncio==3.4.3
|
| 23 |
-
nest_asyncio==1.6.0
|
|
|
|
| 1 |
+
sqlalchemy
|
| 2 |
+
psycopg2-binary
|
| 3 |
openbb==4.1.4
|
| 4 |
riskfolio-lib==5.0.1
|
| 5 |
transformers==4.36.2
|
|
|
|
| 10 |
click==7.0
|
| 11 |
cssutils==1.0.2
|
| 12 |
numpy==1.24.4
|
| 13 |
+
#lxml==4.9.1
|
| 14 |
requests>=2.31.0
|
| 15 |
urllib3>=1.26.7
|
| 16 |
pandas>=1.5.3
|
|
|
|
| 22 |
yfinance==0.2.28
|
| 23 |
python-dotenv==1.0.1
|
| 24 |
asyncio==3.4.3
|
| 25 |
+
nest_asyncio==1.6.0
|
test.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import psycopg2
|
| 2 |
+
|
| 3 |
+
connection = psycopg2.connect(database="hf_fundamentals", user="postgres", password="4x?S3nR", host="localhost", port=5432)
|
| 4 |
+
|
| 5 |
+
cursor = connection.cursor()
|
| 6 |
+
|
| 7 |
+
cursor.execute("SELECT * FROM pg_catalog.pg_tables;")
|
| 8 |
+
|
| 9 |
+
# Fetch all rows from database
|
| 10 |
+
record = cursor.fetchall()
|
| 11 |
+
|
| 12 |
+
print("Data from Database:- ", record)
|