Spaces:
Running
Running
Update advanced_scraper_ui.py and app.py with latest improvements
Browse files- advanced_scraper_ui.py +6 -6
- app.py +4 -0
advanced_scraper_ui.py
CHANGED
@@ -13,6 +13,9 @@ from datetime import datetime
|
|
13 |
from dotenv import load_dotenv
|
14 |
from enhanced_scraper import EnhancedRedditScraper
|
15 |
|
|
|
|
|
|
|
16 |
# Note: Page configuration and session state initialization are handled in app.py
|
17 |
|
18 |
# Functions
|
@@ -158,7 +161,6 @@ def create_data_visualization(results):
|
|
158 |
# Score Distribution
|
159 |
with viz_tab1:
|
160 |
try:
|
161 |
-
st.subheader("Score Distribution")
|
162 |
fig = px.histogram(df, x="score", color="subreddit", nbins=20,
|
163 |
title="Distribution of Post Scores")
|
164 |
fig.update_layout(
|
@@ -182,7 +184,6 @@ def create_data_visualization(results):
|
|
182 |
# Posts by Subreddit
|
183 |
with viz_tab2:
|
184 |
try:
|
185 |
-
st.subheader("Posts by Subreddit")
|
186 |
subreddit_counts = df['subreddit'].value_counts().reset_index()
|
187 |
subreddit_counts.columns = ['subreddit', 'count']
|
188 |
|
@@ -209,7 +210,6 @@ def create_data_visualization(results):
|
|
209 |
# Time Analysis
|
210 |
with viz_tab3:
|
211 |
try:
|
212 |
-
st.subheader("Time Analysis")
|
213 |
if 'created_utc' in df.columns:
|
214 |
try:
|
215 |
# Handle different date formats
|
@@ -268,9 +268,9 @@ def main():
|
|
268 |
'show_only_with_comments': False
|
269 |
}
|
270 |
|
271 |
-
# Header
|
272 |
-
st.
|
273 |
-
st.
|
274 |
|
275 |
# Sidebar for configuration
|
276 |
with st.sidebar:
|
|
|
13 |
from dotenv import load_dotenv
|
14 |
from enhanced_scraper import EnhancedRedditScraper
|
15 |
|
16 |
+
# Disable static file serving to prevent the warning
|
17 |
+
os.environ['STREAMLIT_SERVER_ENABLE_STATIC_SERVING'] = 'false'
|
18 |
+
|
19 |
# Note: Page configuration and session state initialization are handled in app.py
|
20 |
|
21 |
# Functions
|
|
|
161 |
# Score Distribution
|
162 |
with viz_tab1:
|
163 |
try:
|
|
|
164 |
fig = px.histogram(df, x="score", color="subreddit", nbins=20,
|
165 |
title="Distribution of Post Scores")
|
166 |
fig.update_layout(
|
|
|
184 |
# Posts by Subreddit
|
185 |
with viz_tab2:
|
186 |
try:
|
|
|
187 |
subreddit_counts = df['subreddit'].value_counts().reset_index()
|
188 |
subreddit_counts.columns = ['subreddit', 'count']
|
189 |
|
|
|
210 |
# Time Analysis
|
211 |
with viz_tab3:
|
212 |
try:
|
|
|
213 |
if 'created_utc' in df.columns:
|
214 |
try:
|
215 |
# Handle different date formats
|
|
|
268 |
'show_only_with_comments': False
|
269 |
}
|
270 |
|
271 |
+
# Header using Streamlit's native heading components
|
272 |
+
st.title("Reddit Scraper")
|
273 |
+
st.header("Data Collection Tool")
|
274 |
|
275 |
# Sidebar for configuration
|
276 |
with st.sidebar:
|
app.py
CHANGED
@@ -10,6 +10,10 @@ st.set_page_config(
|
|
10 |
initial_sidebar_state="expanded"
|
11 |
)
|
12 |
|
|
|
|
|
|
|
|
|
13 |
# Session state initialization is now handled in advanced_scraper_ui.py
|
14 |
|
15 |
# Load environment variables
|
|
|
10 |
initial_sidebar_state="expanded"
|
11 |
)
|
12 |
|
13 |
+
# Disable static file serving to prevent the static folder warning
|
14 |
+
# This configuration is set using environment variables instead of directly accessing server settings
|
15 |
+
os.environ['STREAMLIT_SERVER_ENABLE_STATIC_SERVING'] = 'false'
|
16 |
+
|
17 |
# Session state initialization is now handled in advanced_scraper_ui.py
|
18 |
|
19 |
# Load environment variables
|