Spaces:
Running
Running
Commit
·
9bf19c4
1
Parent(s):
795900a
ok
Browse files- app.py +28 -30
- llm_recommendations.py +90 -67
- modules/backlinks.py +11 -16
- modules/content_audit.py +0 -11
- modules/keywords.py +3 -19
- modules/technical_seo.py +2 -14
- report_generator.py +99 -54
- simple_pdf_generator.py +16 -26
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
from flask import Flask, render_template, request, jsonify, send_file, redirect, url_for
|
3 |
import validators
|
4 |
import os
|
@@ -7,7 +7,7 @@ import uuid
|
|
7 |
from urllib.parse import urlparse
|
8 |
from typing import Dict, Any
|
9 |
|
10 |
-
|
11 |
from modules.technical_seo import TechnicalSEOModule
|
12 |
from modules.content_audit import ContentAuditModule
|
13 |
from modules.keywords import KeywordsModule
|
@@ -19,7 +19,7 @@ from llm_recommendations import LLMRecommendations
|
|
19 |
app = Flask(__name__, static_folder='static')
|
20 |
app.secret_key = 'seo_report_generator_2024'
|
21 |
|
22 |
-
|
23 |
technical_module = TechnicalSEOModule()
|
24 |
content_module = ContentAuditModule()
|
25 |
keywords_module = KeywordsModule()
|
@@ -28,11 +28,10 @@ report_gen = ReportGenerator()
|
|
28 |
pdf_gen = SimplePDFGenerator()
|
29 |
llm_recommendations = LLMRecommendations()
|
30 |
|
31 |
-
|
32 |
reports_store = {}
|
33 |
|
34 |
def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
|
35 |
-
"""Transform new keywords data structure to match report generator expectations"""
|
36 |
if not new_data or new_data.get('placeholder'):
|
37 |
return {
|
38 |
'placeholder': True,
|
@@ -44,7 +43,7 @@ def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
44 |
'data_source': 'Analysis failed'
|
45 |
}
|
46 |
|
47 |
-
|
48 |
totals = new_data.get('totals', {})
|
49 |
distribution = new_data.get('distribution', {})
|
50 |
movement = new_data.get('movement', {})
|
@@ -53,7 +52,7 @@ def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
53 |
opportunities = new_data.get('opportunities', [])
|
54 |
data_sources = new_data.get('data_sources', {})
|
55 |
|
56 |
-
|
57 |
pos_dist = {
|
58 |
'top_3': distribution.get('top3', 0),
|
59 |
'top_10': distribution.get('top10', 0),
|
@@ -61,27 +60,27 @@ def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
61 |
'beyond_50': totals.get('keywords', 0) - distribution.get('top50', 0)
|
62 |
}
|
63 |
|
64 |
-
|
65 |
transformed_best_keywords = []
|
66 |
for kw in best_keywords:
|
67 |
transformed_best_keywords.append({
|
68 |
'keyword': kw.get('keyword', ''),
|
69 |
'position': kw.get('rank', 0),
|
70 |
-
'clicks': 0,
|
71 |
'impressions': kw.get('volume', 0),
|
72 |
'url': kw.get('url', ''),
|
73 |
'estimated_traffic': kw.get('estimated_traffic', 0),
|
74 |
'trend': kw.get('trend', 'stable')
|
75 |
})
|
76 |
|
77 |
-
|
78 |
transformed_opportunities = []
|
79 |
for opp in opportunities:
|
80 |
transformed_opportunities.append({
|
81 |
'keyword': opp.get('keyword', ''),
|
82 |
-
'position': 0,
|
83 |
'impressions': opp.get('volume', 0),
|
84 |
-
'ctr': 0,
|
85 |
'competitor_rank': opp.get('competitor_rank', 0),
|
86 |
'priority_score': opp.get('priority_score', 0),
|
87 |
'competitor_domain': opp.get('competitor_domain', '')
|
@@ -119,30 +118,30 @@ def generate_report():
|
|
119 |
if not validators.url(url):
|
120 |
return jsonify({'error': 'Please enter a valid URL'}), 400
|
121 |
|
122 |
-
|
123 |
report_id = str(uuid.uuid4())
|
124 |
|
125 |
-
|
126 |
competitor_domains = []
|
127 |
competitor_list = []
|
128 |
for comp in competitors:
|
129 |
comp = comp.strip()
|
130 |
if comp and validators.url(comp):
|
131 |
competitor_list.append(comp)
|
132 |
-
|
133 |
domain = urlparse(comp).netloc.replace('www.', '')
|
134 |
competitor_domains.append(domain)
|
135 |
|
136 |
-
|
137 |
technical_data = technical_module.analyze(url)
|
138 |
|
139 |
-
|
140 |
content_data = content_module.analyze(url)
|
141 |
|
142 |
-
|
143 |
keywords_result = keywords_module.analyze(url, competitor_domains=competitor_domains)
|
144 |
if not keywords_result.success:
|
145 |
-
|
146 |
keywords_data = {
|
147 |
'placeholder': True,
|
148 |
'message': f'Keywords analysis failed: {keywords_result.error}',
|
@@ -153,10 +152,10 @@ def generate_report():
|
|
153 |
'data_source': 'Analysis failed'
|
154 |
}
|
155 |
else:
|
156 |
-
|
157 |
keywords_data = _transform_keywords_data(keywords_result.data)
|
158 |
|
159 |
-
|
160 |
print(f"DEBUG: Starting backlinks analysis for {url}")
|
161 |
backlinks_result = backlinks_module.analyze(url)
|
162 |
backlinks_data = backlinks_result.data
|
@@ -167,18 +166,18 @@ def generate_report():
|
|
167 |
if backlinks_data.get('placeholder'):
|
168 |
print(f"DEBUG: Using placeholder data: {backlinks_data.get('message')}")
|
169 |
|
170 |
-
|
171 |
llm_rec_data = llm_recommendations.generate_recommendations(
|
172 |
url, technical_data, content_data, keywords_data, backlinks_data
|
173 |
)
|
174 |
|
175 |
-
|
176 |
competitor_data = []
|
177 |
for comp_url in competitor_list:
|
178 |
comp_technical = technical_module.analyze(comp_url)
|
179 |
comp_content = content_module.analyze(comp_url, quick_scan=True)
|
180 |
|
181 |
-
|
182 |
comp_keywords_result = keywords_module.analyze(comp_url, competitor_domains=[], quick_scan=True)
|
183 |
if comp_keywords_result.success:
|
184 |
comp_keywords = _transform_keywords_data(comp_keywords_result.data)
|
@@ -193,7 +192,7 @@ def generate_report():
|
|
193 |
'data_source': 'Analysis failed'
|
194 |
}
|
195 |
|
196 |
-
|
197 |
comp_backlinks_result = backlinks_module.analyze(comp_url, quick_scan=True)
|
198 |
comp_backlinks = comp_backlinks_result.data
|
199 |
|
@@ -205,7 +204,7 @@ def generate_report():
|
|
205 |
'backlinks': comp_backlinks
|
206 |
})
|
207 |
|
208 |
-
|
209 |
report_html = report_gen.generate_html_report(
|
210 |
url=url,
|
211 |
technical_data=technical_data,
|
@@ -217,7 +216,7 @@ def generate_report():
|
|
217 |
include_charts=True
|
218 |
)
|
219 |
|
220 |
-
|
221 |
reports_store[report_id] = {
|
222 |
'url': url,
|
223 |
'html': report_html,
|
@@ -256,7 +255,6 @@ def download_html(report_id):
|
|
256 |
|
257 |
report_data = reports_store[report_id]
|
258 |
|
259 |
-
# Create temporary file
|
260 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f:
|
261 |
f.write(report_data['html'])
|
262 |
temp_path = f.name
|
@@ -273,10 +271,10 @@ def download_pdf(report_id):
|
|
273 |
try:
|
274 |
report_data = reports_store[report_id]
|
275 |
|
276 |
-
|
277 |
pdf_data = pdf_gen.generate_pdf(report_data['html'])
|
278 |
|
279 |
-
|
280 |
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:
|
281 |
f.write(pdf_data)
|
282 |
temp_path = f.name
|
|
|
1 |
+
|
2 |
from flask import Flask, render_template, request, jsonify, send_file, redirect, url_for
|
3 |
import validators
|
4 |
import os
|
|
|
7 |
from urllib.parse import urlparse
|
8 |
from typing import Dict, Any
|
9 |
|
10 |
+
|
11 |
from modules.technical_seo import TechnicalSEOModule
|
12 |
from modules.content_audit import ContentAuditModule
|
13 |
from modules.keywords import KeywordsModule
|
|
|
19 |
app = Flask(__name__, static_folder='static')
|
20 |
app.secret_key = 'seo_report_generator_2024'
|
21 |
|
22 |
+
|
23 |
technical_module = TechnicalSEOModule()
|
24 |
content_module = ContentAuditModule()
|
25 |
keywords_module = KeywordsModule()
|
|
|
28 |
pdf_gen = SimplePDFGenerator()
|
29 |
llm_recommendations = LLMRecommendations()
|
30 |
|
31 |
+
|
32 |
reports_store = {}
|
33 |
|
34 |
def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
35 |
if not new_data or new_data.get('placeholder'):
|
36 |
return {
|
37 |
'placeholder': True,
|
|
|
43 |
'data_source': 'Analysis failed'
|
44 |
}
|
45 |
|
46 |
+
|
47 |
totals = new_data.get('totals', {})
|
48 |
distribution = new_data.get('distribution', {})
|
49 |
movement = new_data.get('movement', {})
|
|
|
52 |
opportunities = new_data.get('opportunities', [])
|
53 |
data_sources = new_data.get('data_sources', {})
|
54 |
|
55 |
+
|
56 |
pos_dist = {
|
57 |
'top_3': distribution.get('top3', 0),
|
58 |
'top_10': distribution.get('top10', 0),
|
|
|
60 |
'beyond_50': totals.get('keywords', 0) - distribution.get('top50', 0)
|
61 |
}
|
62 |
|
63 |
+
|
64 |
transformed_best_keywords = []
|
65 |
for kw in best_keywords:
|
66 |
transformed_best_keywords.append({
|
67 |
'keyword': kw.get('keyword', ''),
|
68 |
'position': kw.get('rank', 0),
|
69 |
+
'clicks': 0,
|
70 |
'impressions': kw.get('volume', 0),
|
71 |
'url': kw.get('url', ''),
|
72 |
'estimated_traffic': kw.get('estimated_traffic', 0),
|
73 |
'trend': kw.get('trend', 'stable')
|
74 |
})
|
75 |
|
76 |
+
|
77 |
transformed_opportunities = []
|
78 |
for opp in opportunities:
|
79 |
transformed_opportunities.append({
|
80 |
'keyword': opp.get('keyword', ''),
|
81 |
+
'position': 0,
|
82 |
'impressions': opp.get('volume', 0),
|
83 |
+
'ctr': 0,
|
84 |
'competitor_rank': opp.get('competitor_rank', 0),
|
85 |
'priority_score': opp.get('priority_score', 0),
|
86 |
'competitor_domain': opp.get('competitor_domain', '')
|
|
|
118 |
if not validators.url(url):
|
119 |
return jsonify({'error': 'Please enter a valid URL'}), 400
|
120 |
|
121 |
+
|
122 |
report_id = str(uuid.uuid4())
|
123 |
|
124 |
+
|
125 |
competitor_domains = []
|
126 |
competitor_list = []
|
127 |
for comp in competitors:
|
128 |
comp = comp.strip()
|
129 |
if comp and validators.url(comp):
|
130 |
competitor_list.append(comp)
|
131 |
+
|
132 |
domain = urlparse(comp).netloc.replace('www.', '')
|
133 |
competitor_domains.append(domain)
|
134 |
|
135 |
+
|
136 |
technical_data = technical_module.analyze(url)
|
137 |
|
138 |
+
|
139 |
content_data = content_module.analyze(url)
|
140 |
|
141 |
+
|
142 |
keywords_result = keywords_module.analyze(url, competitor_domains=competitor_domains)
|
143 |
if not keywords_result.success:
|
144 |
+
|
145 |
keywords_data = {
|
146 |
'placeholder': True,
|
147 |
'message': f'Keywords analysis failed: {keywords_result.error}',
|
|
|
152 |
'data_source': 'Analysis failed'
|
153 |
}
|
154 |
else:
|
155 |
+
|
156 |
keywords_data = _transform_keywords_data(keywords_result.data)
|
157 |
|
158 |
+
|
159 |
print(f"DEBUG: Starting backlinks analysis for {url}")
|
160 |
backlinks_result = backlinks_module.analyze(url)
|
161 |
backlinks_data = backlinks_result.data
|
|
|
166 |
if backlinks_data.get('placeholder'):
|
167 |
print(f"DEBUG: Using placeholder data: {backlinks_data.get('message')}")
|
168 |
|
169 |
+
|
170 |
llm_rec_data = llm_recommendations.generate_recommendations(
|
171 |
url, technical_data, content_data, keywords_data, backlinks_data
|
172 |
)
|
173 |
|
174 |
+
|
175 |
competitor_data = []
|
176 |
for comp_url in competitor_list:
|
177 |
comp_technical = technical_module.analyze(comp_url)
|
178 |
comp_content = content_module.analyze(comp_url, quick_scan=True)
|
179 |
|
180 |
+
|
181 |
comp_keywords_result = keywords_module.analyze(comp_url, competitor_domains=[], quick_scan=True)
|
182 |
if comp_keywords_result.success:
|
183 |
comp_keywords = _transform_keywords_data(comp_keywords_result.data)
|
|
|
192 |
'data_source': 'Analysis failed'
|
193 |
}
|
194 |
|
195 |
+
|
196 |
comp_backlinks_result = backlinks_module.analyze(comp_url, quick_scan=True)
|
197 |
comp_backlinks = comp_backlinks_result.data
|
198 |
|
|
|
204 |
'backlinks': comp_backlinks
|
205 |
})
|
206 |
|
207 |
+
|
208 |
report_html = report_gen.generate_html_report(
|
209 |
url=url,
|
210 |
technical_data=technical_data,
|
|
|
216 |
include_charts=True
|
217 |
)
|
218 |
|
219 |
+
|
220 |
reports_store[report_id] = {
|
221 |
'url': url,
|
222 |
'html': report_html,
|
|
|
255 |
|
256 |
report_data = reports_store[report_id]
|
257 |
|
|
|
258 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f:
|
259 |
f.write(report_data['html'])
|
260 |
temp_path = f.name
|
|
|
271 |
try:
|
272 |
report_data = reports_store[report_id]
|
273 |
|
274 |
+
|
275 |
pdf_data = pdf_gen.generate_pdf(report_data['html'])
|
276 |
|
277 |
+
|
278 |
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:
|
279 |
f.write(pdf_data)
|
280 |
temp_path = f.name
|
llm_recommendations.py
CHANGED
@@ -1,7 +1,4 @@
|
|
1 |
-
|
2 |
-
Groq LLM Integration for Smart SEO Recommendations
|
3 |
-
Analyzes all 4 modules (Technical SEO, Content Audit, Keywords, Backlinks) to generate intelligent recommendations
|
4 |
-
"""
|
5 |
|
6 |
import os
|
7 |
import json
|
@@ -9,7 +6,7 @@ from typing import Dict, Any, List
|
|
9 |
from groq import Groq
|
10 |
from dotenv import load_dotenv
|
11 |
|
12 |
-
|
13 |
load_dotenv()
|
14 |
|
15 |
|
@@ -25,33 +22,20 @@ class LLMRecommendations:
|
|
25 |
def generate_recommendations(self, url: str, technical_data: Dict[str, Any],
|
26 |
content_data: Dict[str, Any], keywords_data: Dict[str, Any],
|
27 |
backlinks_data: Dict[str, Any]) -> Dict[str, Any]:
|
28 |
-
"""
|
29 |
-
Generate comprehensive SEO recommendations based on all module data
|
30 |
-
|
31 |
-
Args:
|
32 |
-
url: Target website URL
|
33 |
-
technical_data: Technical SEO analysis results
|
34 |
-
content_data: Content audit results
|
35 |
-
keywords_data: Keywords analysis results
|
36 |
-
backlinks_data: Backlinks analysis results
|
37 |
-
|
38 |
-
Returns:
|
39 |
-
Dictionary with recommendations and insights
|
40 |
-
"""
|
41 |
if not self.available:
|
42 |
return self._generate_fallback_recommendations(technical_data, content_data, keywords_data, backlinks_data)
|
43 |
|
44 |
try:
|
45 |
-
|
46 |
context = self._prepare_context(url, technical_data, content_data, keywords_data, backlinks_data)
|
47 |
|
48 |
-
|
49 |
recommendations = self._query_llm(context)
|
50 |
|
51 |
return {
|
52 |
-
'
|
53 |
'executive_insights': self._generate_executive_insights(context),
|
54 |
-
'priority_actions': self._extract_priority_actions(recommendations),
|
55 |
'data_source': 'Groq LLM Analysis',
|
56 |
'generated_at': context['analysis_date']
|
57 |
}
|
@@ -61,9 +45,8 @@ class LLMRecommendations:
|
|
61 |
|
62 |
def _prepare_context(self, url: str, technical_data: Dict, content_data: Dict,
|
63 |
keywords_data: Dict, backlinks_data: Dict) -> Dict[str, Any]:
|
64 |
-
"""Prepare structured context for LLM analysis"""
|
65 |
|
66 |
-
|
67 |
context = {
|
68 |
'website': url,
|
69 |
'analysis_date': technical_data.get('last_updated', ''),
|
@@ -101,7 +84,6 @@ class LLMRecommendations:
|
|
101 |
return context
|
102 |
|
103 |
def _query_llm(self, context: Dict[str, Any]) -> List[str]:
|
104 |
-
"""Query Groq LLM for SEO recommendations"""
|
105 |
|
106 |
prompt = f"""
|
107 |
You are an expert SEO consultant analyzing a comprehensive SEO audit for {context['website']}. Based on the data below, provide specific, actionable SEO recommendations.
|
@@ -143,12 +125,18 @@ CRITICAL INSTRUCTIONS:
|
|
143 |
5. Prioritize recommendations by potential impact and ease of implementation
|
144 |
6. Include technical optimizations, content improvements, keyword opportunities, and link building strategies
|
145 |
7. Provide estimated timelines and resources needed for each recommendation
|
|
|
|
|
146 |
|
147 |
-
Generate exactly 8-12 specific recommendations
|
148 |
-
|
|
|
|
|
|
|
149 |
|
150 |
Priority Levels: HIGH, MEDIUM, LOW
|
151 |
Focus on actionable items that can be implemented within 30-90 days.
|
|
|
152 |
|
153 |
Response:
|
154 |
"""
|
@@ -158,35 +146,25 @@ Response:
|
|
158 |
messages=[
|
159 |
{'role': 'user', 'content': prompt}
|
160 |
],
|
161 |
-
model="
|
162 |
stream=False,
|
163 |
-
temperature=0.1,
|
164 |
max_tokens=1500
|
165 |
)
|
166 |
|
167 |
response = chat_completion.choices[0].message.content.strip()
|
168 |
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
for line in lines:
|
173 |
-
line = line.strip()
|
174 |
-
if line.startswith('- **') or line.startswith('•'):
|
175 |
-
# Clean up the recommendation
|
176 |
-
recommendation = line.replace('- **', '').replace('• **', '').strip()
|
177 |
-
if recommendation:
|
178 |
-
recommendations.append(recommendation)
|
179 |
-
|
180 |
-
return recommendations if recommendations else [response]
|
181 |
|
182 |
except Exception as e:
|
183 |
return [f"LLM Error: {str(e)}"]
|
184 |
|
185 |
def _generate_executive_insights(self, context: Dict[str, Any]) -> List[str]:
|
186 |
-
"""Generate high-level executive insights"""
|
187 |
insights = []
|
188 |
|
189 |
-
|
190 |
mobile_score = context['technical_seo']['mobile_score']
|
191 |
desktop_score = context['technical_seo']['desktop_score']
|
192 |
avg_score = (mobile_score + desktop_score) / 2
|
@@ -198,7 +176,7 @@ Response:
|
|
198 |
else:
|
199 |
insights.append(f"🟢 Good: Website performance is solid (avg: {avg_score:.0f}/100)")
|
200 |
|
201 |
-
|
202 |
pages = context['content_audit']['pages_analyzed']
|
203 |
if pages > 0:
|
204 |
metadata = context['content_audit']['metadata_completeness']
|
@@ -209,7 +187,7 @@ Response:
|
|
209 |
else:
|
210 |
insights.append(f"🟢 Content Quality: Metadata completeness is good ({title_pct:.0f}%)")
|
211 |
|
212 |
-
|
213 |
if context['keywords']['data_available']:
|
214 |
total_keywords = context['keywords']['total_keywords']
|
215 |
pos_dist = context['keywords']['position_distribution']
|
@@ -224,7 +202,7 @@ Response:
|
|
224 |
else:
|
225 |
insights.append("📊 Connect keyword tracking tools for visibility insights")
|
226 |
|
227 |
-
|
228 |
if context['backlinks']['data_available']:
|
229 |
ref_domains = context['backlinks']['total_ref_domains']
|
230 |
domain_rating = context['backlinks']['domain_rating']
|
@@ -241,22 +219,65 @@ Response:
|
|
241 |
return insights
|
242 |
|
243 |
def _extract_priority_actions(self, recommendations: List[str]) -> List[Dict[str, str]]:
|
244 |
-
"""Extract priority actions from recommendations"""
|
245 |
priority_actions = []
|
246 |
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
|
259 |
-
|
260 |
if not priority_actions and recommendations:
|
261 |
for i, rec in enumerate(recommendations[:3]):
|
262 |
if ':' in rec:
|
@@ -267,15 +288,14 @@ Response:
|
|
267 |
'priority': 'HIGH'
|
268 |
})
|
269 |
|
270 |
-
return priority_actions[:5]
|
271 |
|
272 |
def _generate_fallback_recommendations(self, technical_data: Dict, content_data: Dict,
|
273 |
keywords_data: Dict, backlinks_data: Dict, error: str = None) -> Dict[str, Any]:
|
274 |
-
"""Generate basic recommendations when LLM is not available"""
|
275 |
|
276 |
recommendations = []
|
277 |
|
278 |
-
|
279 |
mobile_score = technical_data.get('mobile_score', 0)
|
280 |
desktop_score = technical_data.get('desktop_score', 0)
|
281 |
|
@@ -284,7 +304,7 @@ Response:
|
|
284 |
if desktop_score < 50:
|
285 |
recommendations.append("**HIGH** Improve Desktop Performance: Optimize server response time, minimize CSS and JavaScript")
|
286 |
|
287 |
-
|
288 |
pages = content_data.get('pages_analyzed', 0)
|
289 |
if pages > 0:
|
290 |
metadata = content_data.get('metadata_completeness', {})
|
@@ -294,7 +314,7 @@ Response:
|
|
294 |
if content_data.get('avg_word_count', 0) < 300:
|
295 |
recommendations.append("**MEDIUM** Enhance Content: Increase average page content length")
|
296 |
|
297 |
-
|
298 |
if not keywords_data.get('placeholder', False):
|
299 |
total_keywords = keywords_data.get('total_keywords', 0)
|
300 |
pos_dist = keywords_data.get('position_distribution', {})
|
@@ -304,7 +324,7 @@ Response:
|
|
304 |
else:
|
305 |
recommendations.append("**MEDIUM** Set Up Keyword Tracking: Connect Google Search Console for keyword insights")
|
306 |
|
307 |
-
|
308 |
if not backlinks_data.get('placeholder', False):
|
309 |
ref_domains = backlinks_data.get('total_ref_domains', 0)
|
310 |
if ref_domains < 50:
|
@@ -312,7 +332,7 @@ Response:
|
|
312 |
else:
|
313 |
recommendations.append("**MEDIUM** Set Up Backlink Monitoring: Add RapidAPI key for comprehensive link analysis")
|
314 |
|
315 |
-
|
316 |
if not recommendations:
|
317 |
recommendations = [
|
318 |
"**HIGH** Audit Technical Issues: Review site speed and mobile performance",
|
@@ -329,8 +349,11 @@ Response:
|
|
329 |
if error:
|
330 |
insights.append(f"❌ LLM Error: {error}")
|
331 |
|
|
|
|
|
|
|
332 |
return {
|
333 |
-
'
|
334 |
'executive_insights': insights,
|
335 |
'priority_actions': [
|
336 |
{
|
|
|
1 |
+
|
|
|
|
|
|
|
2 |
|
3 |
import os
|
4 |
import json
|
|
|
6 |
from groq import Groq
|
7 |
from dotenv import load_dotenv
|
8 |
|
9 |
+
|
10 |
load_dotenv()
|
11 |
|
12 |
|
|
|
22 |
def generate_recommendations(self, url: str, technical_data: Dict[str, Any],
|
23 |
content_data: Dict[str, Any], keywords_data: Dict[str, Any],
|
24 |
backlinks_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
if not self.available:
|
26 |
return self._generate_fallback_recommendations(technical_data, content_data, keywords_data, backlinks_data)
|
27 |
|
28 |
try:
|
29 |
+
|
30 |
context = self._prepare_context(url, technical_data, content_data, keywords_data, backlinks_data)
|
31 |
|
32 |
+
|
33 |
recommendations = self._query_llm(context)
|
34 |
|
35 |
return {
|
36 |
+
'recommendations_markdown': recommendations,
|
37 |
'executive_insights': self._generate_executive_insights(context),
|
38 |
+
'priority_actions': self._extract_priority_actions([recommendations]),
|
39 |
'data_source': 'Groq LLM Analysis',
|
40 |
'generated_at': context['analysis_date']
|
41 |
}
|
|
|
45 |
|
46 |
def _prepare_context(self, url: str, technical_data: Dict, content_data: Dict,
|
47 |
keywords_data: Dict, backlinks_data: Dict) -> Dict[str, Any]:
|
|
|
48 |
|
49 |
+
|
50 |
context = {
|
51 |
'website': url,
|
52 |
'analysis_date': technical_data.get('last_updated', ''),
|
|
|
84 |
return context
|
85 |
|
86 |
def _query_llm(self, context: Dict[str, Any]) -> List[str]:
|
|
|
87 |
|
88 |
prompt = f"""
|
89 |
You are an expert SEO consultant analyzing a comprehensive SEO audit for {context['website']}. Based on the data below, provide specific, actionable SEO recommendations.
|
|
|
125 |
5. Prioritize recommendations by potential impact and ease of implementation
|
126 |
6. Include technical optimizations, content improvements, keyword opportunities, and link building strategies
|
127 |
7. Provide estimated timelines and resources needed for each recommendation
|
128 |
+
8. IMPORTANT: Use ONLY plain text format with markdown syntax - NO tables, NO complex formatting, NO HTML
|
129 |
+
9. Format your response as clean markdown that can be rendered properly
|
130 |
|
131 |
+
Generate exactly 8-12 specific recommendations using simple markdown format:
|
132 |
+
## Priority: HIGH/MEDIUM/LOW
|
133 |
+
**Action Title**
|
134 |
+
Description with clear steps and expected impact.
|
135 |
+
Timeline: X weeks
|
136 |
|
137 |
Priority Levels: HIGH, MEDIUM, LOW
|
138 |
Focus on actionable items that can be implemented within 30-90 days.
|
139 |
+
Use simple markdown formatting only - headers, bold text, and bullet points.
|
140 |
|
141 |
Response:
|
142 |
"""
|
|
|
146 |
messages=[
|
147 |
{'role': 'user', 'content': prompt}
|
148 |
],
|
149 |
+
model="openai/gpt-oss-120b",
|
150 |
stream=False,
|
151 |
+
temperature=0.1,
|
152 |
max_tokens=1500
|
153 |
)
|
154 |
|
155 |
response = chat_completion.choices[0].message.content.strip()
|
156 |
|
157 |
+
|
158 |
+
# Return the full markdown response instead of parsing individual recommendations
|
159 |
+
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
except Exception as e:
|
162 |
return [f"LLM Error: {str(e)}"]
|
163 |
|
164 |
def _generate_executive_insights(self, context: Dict[str, Any]) -> List[str]:
|
|
|
165 |
insights = []
|
166 |
|
167 |
+
|
168 |
mobile_score = context['technical_seo']['mobile_score']
|
169 |
desktop_score = context['technical_seo']['desktop_score']
|
170 |
avg_score = (mobile_score + desktop_score) / 2
|
|
|
176 |
else:
|
177 |
insights.append(f"🟢 Good: Website performance is solid (avg: {avg_score:.0f}/100)")
|
178 |
|
179 |
+
|
180 |
pages = context['content_audit']['pages_analyzed']
|
181 |
if pages > 0:
|
182 |
metadata = context['content_audit']['metadata_completeness']
|
|
|
187 |
else:
|
188 |
insights.append(f"🟢 Content Quality: Metadata completeness is good ({title_pct:.0f}%)")
|
189 |
|
190 |
+
|
191 |
if context['keywords']['data_available']:
|
192 |
total_keywords = context['keywords']['total_keywords']
|
193 |
pos_dist = context['keywords']['position_distribution']
|
|
|
202 |
else:
|
203 |
insights.append("📊 Connect keyword tracking tools for visibility insights")
|
204 |
|
205 |
+
|
206 |
if context['backlinks']['data_available']:
|
207 |
ref_domains = context['backlinks']['total_ref_domains']
|
208 |
domain_rating = context['backlinks']['domain_rating']
|
|
|
219 |
return insights
|
220 |
|
221 |
def _extract_priority_actions(self, recommendations: List[str]) -> List[Dict[str, str]]:
|
|
|
222 |
priority_actions = []
|
223 |
|
224 |
+
# Handle the case where recommendations is a single string (markdown)
|
225 |
+
if isinstance(recommendations, list) and len(recommendations) == 1:
|
226 |
+
markdown_text = recommendations[0]
|
227 |
+
elif isinstance(recommendations, str):
|
228 |
+
markdown_text = recommendations
|
229 |
+
else:
|
230 |
+
markdown_text = ""
|
231 |
+
|
232 |
+
# Extract high priority actions from markdown
|
233 |
+
if markdown_text:
|
234 |
+
lines = markdown_text.split('\n')
|
235 |
+
current_priority = None
|
236 |
+
current_title = None
|
237 |
+
current_description = []
|
238 |
+
|
239 |
+
for line in lines:
|
240 |
+
line = line.strip()
|
241 |
+
if line.startswith('## Priority:'):
|
242 |
+
# Save previous action if exists
|
243 |
+
if current_title and current_priority == 'HIGH':
|
244 |
+
priority_actions.append({
|
245 |
+
'title': current_title,
|
246 |
+
'description': ' '.join(current_description).strip(),
|
247 |
+
'priority': 'HIGH'
|
248 |
+
})
|
249 |
+
|
250 |
+
# Start new action
|
251 |
+
current_priority = line.replace('## Priority:', '').strip()
|
252 |
+
current_title = None
|
253 |
+
current_description = []
|
254 |
+
elif line.startswith('**') and line.endswith('**'):
|
255 |
+
current_title = line.replace('**', '').strip()
|
256 |
+
elif line and not line.startswith('#'):
|
257 |
+
current_description.append(line)
|
258 |
+
|
259 |
+
# Save last action if exists
|
260 |
+
if current_title and current_priority == 'HIGH':
|
261 |
+
priority_actions.append({
|
262 |
+
'title': current_title,
|
263 |
+
'description': ' '.join(current_description).strip(),
|
264 |
+
'priority': 'HIGH'
|
265 |
+
})
|
266 |
+
|
267 |
+
# Fallback for old format
|
268 |
+
if not priority_actions and isinstance(recommendations, list):
|
269 |
+
for rec in recommendations:
|
270 |
+
if '**HIGH**' in rec or '**CRITICAL**' in rec:
|
271 |
+
parts = rec.replace('**HIGH**', '').replace('**CRITICAL**', '').strip()
|
272 |
+
if ':' in parts:
|
273 |
+
title, description = parts.split(':', 1)
|
274 |
+
priority_actions.append({
|
275 |
+
'title': title.strip(),
|
276 |
+
'description': description.strip(),
|
277 |
+
'priority': 'HIGH'
|
278 |
+
})
|
279 |
|
280 |
+
|
281 |
if not priority_actions and recommendations:
|
282 |
for i, rec in enumerate(recommendations[:3]):
|
283 |
if ':' in rec:
|
|
|
288 |
'priority': 'HIGH'
|
289 |
})
|
290 |
|
291 |
+
return priority_actions[:5]
|
292 |
|
293 |
def _generate_fallback_recommendations(self, technical_data: Dict, content_data: Dict,
|
294 |
keywords_data: Dict, backlinks_data: Dict, error: str = None) -> Dict[str, Any]:
|
|
|
295 |
|
296 |
recommendations = []
|
297 |
|
298 |
+
|
299 |
mobile_score = technical_data.get('mobile_score', 0)
|
300 |
desktop_score = technical_data.get('desktop_score', 0)
|
301 |
|
|
|
304 |
if desktop_score < 50:
|
305 |
recommendations.append("**HIGH** Improve Desktop Performance: Optimize server response time, minimize CSS and JavaScript")
|
306 |
|
307 |
+
|
308 |
pages = content_data.get('pages_analyzed', 0)
|
309 |
if pages > 0:
|
310 |
metadata = content_data.get('metadata_completeness', {})
|
|
|
314 |
if content_data.get('avg_word_count', 0) < 300:
|
315 |
recommendations.append("**MEDIUM** Enhance Content: Increase average page content length")
|
316 |
|
317 |
+
|
318 |
if not keywords_data.get('placeholder', False):
|
319 |
total_keywords = keywords_data.get('total_keywords', 0)
|
320 |
pos_dist = keywords_data.get('position_distribution', {})
|
|
|
324 |
else:
|
325 |
recommendations.append("**MEDIUM** Set Up Keyword Tracking: Connect Google Search Console for keyword insights")
|
326 |
|
327 |
+
|
328 |
if not backlinks_data.get('placeholder', False):
|
329 |
ref_domains = backlinks_data.get('total_ref_domains', 0)
|
330 |
if ref_domains < 50:
|
|
|
332 |
else:
|
333 |
recommendations.append("**MEDIUM** Set Up Backlink Monitoring: Add RapidAPI key for comprehensive link analysis")
|
334 |
|
335 |
+
|
336 |
if not recommendations:
|
337 |
recommendations = [
|
338 |
"**HIGH** Audit Technical Issues: Review site speed and mobile performance",
|
|
|
349 |
if error:
|
350 |
insights.append(f"❌ LLM Error: {error}")
|
351 |
|
352 |
+
# Convert recommendations list to markdown format
|
353 |
+
markdown_recommendations = "\n".join([f"## Priority: HIGH\n**{rec.replace('**HIGH**', '').replace('**MEDIUM**', '').replace('**LOW**', '').strip()}**\n" for rec in recommendations])
|
354 |
+
|
355 |
return {
|
356 |
+
'recommendations_markdown': markdown_recommendations,
|
357 |
'executive_insights': insights,
|
358 |
'priority_actions': [
|
359 |
{
|
modules/backlinks.py
CHANGED
@@ -73,14 +73,12 @@ class BacklinksModule:
|
|
73 |
)
|
74 |
|
75 |
def _extract_domain(self, url: str) -> str:
|
76 |
-
"""Extract clean domain from URL"""
|
77 |
if not url.startswith(('http://', 'https://')):
|
78 |
url = 'https://' + url
|
79 |
domain = urlparse(url).netloc.replace('www.', '')
|
80 |
return domain
|
81 |
|
82 |
def _api_request_with_retry(self, url: str, params: Dict = None, headers: Dict = None) -> Optional[Dict]:
|
83 |
-
"""Make API request with retry logic"""
|
84 |
if headers is None:
|
85 |
headers = self.headers.copy()
|
86 |
|
@@ -90,8 +88,8 @@ class BacklinksModule:
|
|
90 |
|
91 |
if response.status_code == 200:
|
92 |
return response.json()
|
93 |
-
elif response.status_code == 429:
|
94 |
-
wait_time = (attempt + 1) * 2
|
95 |
print(f"Rate limited, waiting {wait_time}s...")
|
96 |
time.sleep(wait_time)
|
97 |
continue
|
@@ -124,7 +122,7 @@ class BacklinksModule:
|
|
124 |
# Limit results for quick scan
|
125 |
if quick_scan:
|
126 |
return data[:50]
|
127 |
-
return data[:500]
|
128 |
|
129 |
except Exception as e:
|
130 |
print(f"Individual backlinks API error: {str(e)}")
|
@@ -132,7 +130,6 @@ class BacklinksModule:
|
|
132 |
return []
|
133 |
|
134 |
def _get_majestic_metrics(self, domain: str) -> Dict[str, Any]:
|
135 |
-
"""Get Majestic domain metrics via RapidAPI"""
|
136 |
try:
|
137 |
headers = self.headers.copy()
|
138 |
headers['x-rapidapi-host'] = 'majestic1.p.rapidapi.com'
|
@@ -274,7 +271,7 @@ class BacklinksModule:
|
|
274 |
|
275 |
# Sort by backlinks count and return top domains
|
276 |
top_domains = sorted(domain_stats.values(), key=lambda x: x['backlinks'], reverse=True)
|
277 |
-
return top_domains[:20]
|
278 |
|
279 |
def _extract_anchor_distribution(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
|
280 |
"""Analyze anchor text distribution"""
|
@@ -282,7 +279,7 @@ class BacklinksModule:
|
|
282 |
|
283 |
for link in backlinks:
|
284 |
anchor = link.get('anchor', '').strip()
|
285 |
-
if not anchor or len(anchor) > 100:
|
286 |
continue
|
287 |
|
288 |
if anchor not in anchor_stats:
|
@@ -316,7 +313,7 @@ class BacklinksModule:
|
|
316 |
|
317 |
# Sort by backlinks count
|
318 |
anchor_distribution.sort(key=lambda x: x['backlinks'], reverse=True)
|
319 |
-
return anchor_distribution[:15]
|
320 |
|
321 |
def _calculate_monthly_changes(self, backlinks: List[Dict]) -> Dict[str, int]:
|
322 |
"""Calculate monthly backlinks changes"""
|
@@ -335,14 +332,14 @@ class BacklinksModule:
|
|
335 |
link_date = datetime.strptime(first_seen, '%Y-%m-%d')
|
336 |
if link_date >= last_month:
|
337 |
new_links += 1
|
338 |
-
if link_date >= now - timedelta(days=90):
|
339 |
recent_links += 1
|
340 |
except Exception:
|
341 |
continue
|
342 |
|
343 |
return {
|
344 |
'new_backlinks': new_links,
|
345 |
-
'lost_backlinks': 0,
|
346 |
'net_change': new_links,
|
347 |
'recent_backlinks_3m': recent_links
|
348 |
}
|
@@ -384,9 +381,9 @@ class BacklinksModule:
|
|
384 |
|
385 |
# Quality score (0-100)
|
386 |
quality_score = min(100, (
|
387 |
-
(follow_ratio * 0.4) +
|
388 |
-
(avg_authority * 2) +
|
389 |
-
(min(20, len(set(link.get('url_from', '').split('/')[2] for link in backlinks))) * 1)
|
390 |
))
|
391 |
|
392 |
return {
|
@@ -398,7 +395,6 @@ class BacklinksModule:
|
|
398 |
}
|
399 |
|
400 |
def _get_data_sources(self, individual_backlinks: List, majestic_metrics: Dict, domain_metrics: Dict) -> List[str]:
|
401 |
-
"""Track which data sources provided information"""
|
402 |
sources = []
|
403 |
|
404 |
if individual_backlinks:
|
@@ -411,7 +407,6 @@ class BacklinksModule:
|
|
411 |
return sources or ['No data sources available']
|
412 |
|
413 |
def _generate_no_api_data(self, url: str) -> ModuleResult:
|
414 |
-
"""Generate response when no API key is available"""
|
415 |
domain = self._extract_domain(url)
|
416 |
|
417 |
no_api_data = {
|
|
|
73 |
)
|
74 |
|
75 |
def _extract_domain(self, url: str) -> str:
|
|
|
76 |
if not url.startswith(('http://', 'https://')):
|
77 |
url = 'https://' + url
|
78 |
domain = urlparse(url).netloc.replace('www.', '')
|
79 |
return domain
|
80 |
|
81 |
def _api_request_with_retry(self, url: str, params: Dict = None, headers: Dict = None) -> Optional[Dict]:
|
|
|
82 |
if headers is None:
|
83 |
headers = self.headers.copy()
|
84 |
|
|
|
88 |
|
89 |
if response.status_code == 200:
|
90 |
return response.json()
|
91 |
+
elif response.status_code == 429:
|
92 |
+
wait_time = (attempt + 1) * 2
|
93 |
print(f"Rate limited, waiting {wait_time}s...")
|
94 |
time.sleep(wait_time)
|
95 |
continue
|
|
|
122 |
# Limit results for quick scan
|
123 |
if quick_scan:
|
124 |
return data[:50]
|
125 |
+
return data[:500]
|
126 |
|
127 |
except Exception as e:
|
128 |
print(f"Individual backlinks API error: {str(e)}")
|
|
|
130 |
return []
|
131 |
|
132 |
def _get_majestic_metrics(self, domain: str) -> Dict[str, Any]:
|
|
|
133 |
try:
|
134 |
headers = self.headers.copy()
|
135 |
headers['x-rapidapi-host'] = 'majestic1.p.rapidapi.com'
|
|
|
271 |
|
272 |
# Sort by backlinks count and return top domains
|
273 |
top_domains = sorted(domain_stats.values(), key=lambda x: x['backlinks'], reverse=True)
|
274 |
+
return top_domains[:20]
|
275 |
|
276 |
def _extract_anchor_distribution(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
|
277 |
"""Analyze anchor text distribution"""
|
|
|
279 |
|
280 |
for link in backlinks:
|
281 |
anchor = link.get('anchor', '').strip()
|
282 |
+
if not anchor or len(anchor) > 100:
|
283 |
continue
|
284 |
|
285 |
if anchor not in anchor_stats:
|
|
|
313 |
|
314 |
# Sort by backlinks count
|
315 |
anchor_distribution.sort(key=lambda x: x['backlinks'], reverse=True)
|
316 |
+
return anchor_distribution[:15]
|
317 |
|
318 |
def _calculate_monthly_changes(self, backlinks: List[Dict]) -> Dict[str, int]:
|
319 |
"""Calculate monthly backlinks changes"""
|
|
|
332 |
link_date = datetime.strptime(first_seen, '%Y-%m-%d')
|
333 |
if link_date >= last_month:
|
334 |
new_links += 1
|
335 |
+
if link_date >= now - timedelta(days=90):
|
336 |
recent_links += 1
|
337 |
except Exception:
|
338 |
continue
|
339 |
|
340 |
return {
|
341 |
'new_backlinks': new_links,
|
342 |
+
'lost_backlinks': 0,
|
343 |
'net_change': new_links,
|
344 |
'recent_backlinks_3m': recent_links
|
345 |
}
|
|
|
381 |
|
382 |
# Quality score (0-100)
|
383 |
quality_score = min(100, (
|
384 |
+
(follow_ratio * 0.4) +
|
385 |
+
(avg_authority * 2) +
|
386 |
+
(min(20, len(set(link.get('url_from', '').split('/')[2] for link in backlinks))) * 1)
|
387 |
))
|
388 |
|
389 |
return {
|
|
|
395 |
}
|
396 |
|
397 |
def _get_data_sources(self, individual_backlinks: List, majestic_metrics: Dict, domain_metrics: Dict) -> List[str]:
|
|
|
398 |
sources = []
|
399 |
|
400 |
if individual_backlinks:
|
|
|
407 |
return sources or ['No data sources available']
|
408 |
|
409 |
def _generate_no_api_data(self, url: str) -> ModuleResult:
|
|
|
410 |
domain = self._extract_domain(url)
|
411 |
|
412 |
no_api_data = {
|
modules/content_audit.py
CHANGED
@@ -59,7 +59,6 @@ class ContentAuditModule:
|
|
59 |
return self._get_fallback_data(url, str(e))
|
60 |
|
61 |
def _get_sitemap_urls(self, base_url: str, limit: int = 200) -> List[str]:
|
62 |
-
"""Extract URLs from sitemap.xml"""
|
63 |
urls = []
|
64 |
|
65 |
# Common sitemap locations
|
@@ -81,7 +80,6 @@ class ContentAuditModule:
|
|
81 |
return urls[:limit]
|
82 |
|
83 |
def _parse_sitemap(self, sitemap_content: bytes, base_url: str, limit: int) -> List[str]:
|
84 |
-
"""Parse sitemap XML content"""
|
85 |
urls = []
|
86 |
|
87 |
try:
|
@@ -117,7 +115,6 @@ class ContentAuditModule:
|
|
117 |
return urls[:limit]
|
118 |
|
119 |
def _crawl_from_homepage(self, base_url: str, limit: int = 50) -> List[str]:
|
120 |
-
"""Crawl URLs starting from homepage"""
|
121 |
urls = set([base_url])
|
122 |
processed = set()
|
123 |
|
@@ -143,7 +140,6 @@ class ContentAuditModule:
|
|
143 |
return list(urls)[:limit]
|
144 |
|
145 |
def _analyze_page(self, url: str) -> Dict[str, Any]:
|
146 |
-
"""Analyze a single page"""
|
147 |
try:
|
148 |
response = self.session.get(url, timeout=15)
|
149 |
if response.status_code != 200:
|
@@ -208,7 +204,6 @@ class ContentAuditModule:
|
|
208 |
return soup.get_text()
|
209 |
|
210 |
def _detect_cta(self, soup: BeautifulSoup) -> bool:
|
211 |
-
"""Detect presence of call-to-action elements"""
|
212 |
text_content = soup.get_text().lower()
|
213 |
|
214 |
for keyword in self.cta_keywords:
|
@@ -225,7 +220,6 @@ class ContentAuditModule:
|
|
225 |
return False
|
226 |
|
227 |
def _get_last_modified(self, headers: Dict, soup: BeautifulSoup) -> str:
|
228 |
-
"""Get last modified date from headers or meta tags"""
|
229 |
# Check headers first
|
230 |
if 'last-modified' in headers:
|
231 |
return headers['last-modified']
|
@@ -240,7 +234,6 @@ class ContentAuditModule:
|
|
240 |
return ""
|
241 |
|
242 |
def _is_valid_content_url(self, url: str) -> bool:
|
243 |
-
"""Check if URL is valid for content analysis"""
|
244 |
if not url:
|
245 |
return False
|
246 |
|
@@ -261,7 +254,6 @@ class ContentAuditModule:
|
|
261 |
return True
|
262 |
|
263 |
def _is_same_domain(self, url1: str, url2: str) -> bool:
|
264 |
-
"""Check if two URLs are from the same domain"""
|
265 |
try:
|
266 |
domain1 = urlparse(url1).netloc
|
267 |
domain2 = urlparse(url2).netloc
|
@@ -270,7 +262,6 @@ class ContentAuditModule:
|
|
270 |
return False
|
271 |
|
272 |
def _calculate_metrics(self, base_url: str, pages_data: List[Dict], quick_scan: bool) -> Dict[str, Any]:
|
273 |
-
"""Calculate aggregate metrics from page data"""
|
274 |
total_pages = len(pages_data)
|
275 |
valid_pages = [p for p in pages_data if 'error' not in p]
|
276 |
|
@@ -318,7 +309,6 @@ class ContentAuditModule:
|
|
318 |
}
|
319 |
|
320 |
def _analyze_content_freshness(self, pages_data: List[Dict]) -> Dict[str, Any]:
|
321 |
-
"""Analyze content freshness based on last modified dates"""
|
322 |
now = datetime.now()
|
323 |
six_months_ago = now - timedelta(days=180)
|
324 |
eighteen_months_ago = now - timedelta(days=540)
|
@@ -361,7 +351,6 @@ class ContentAuditModule:
|
|
361 |
}
|
362 |
|
363 |
def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
|
364 |
-
"""Return fallback data when analysis fails"""
|
365 |
return {
|
366 |
'url': url,
|
367 |
'error': f"Content audit failed: {error}",
|
|
|
59 |
return self._get_fallback_data(url, str(e))
|
60 |
|
61 |
def _get_sitemap_urls(self, base_url: str, limit: int = 200) -> List[str]:
|
|
|
62 |
urls = []
|
63 |
|
64 |
# Common sitemap locations
|
|
|
80 |
return urls[:limit]
|
81 |
|
82 |
def _parse_sitemap(self, sitemap_content: bytes, base_url: str, limit: int) -> List[str]:
|
|
|
83 |
urls = []
|
84 |
|
85 |
try:
|
|
|
115 |
return urls[:limit]
|
116 |
|
117 |
def _crawl_from_homepage(self, base_url: str, limit: int = 50) -> List[str]:
|
|
|
118 |
urls = set([base_url])
|
119 |
processed = set()
|
120 |
|
|
|
140 |
return list(urls)[:limit]
|
141 |
|
142 |
def _analyze_page(self, url: str) -> Dict[str, Any]:
|
|
|
143 |
try:
|
144 |
response = self.session.get(url, timeout=15)
|
145 |
if response.status_code != 200:
|
|
|
204 |
return soup.get_text()
|
205 |
|
206 |
def _detect_cta(self, soup: BeautifulSoup) -> bool:
|
|
|
207 |
text_content = soup.get_text().lower()
|
208 |
|
209 |
for keyword in self.cta_keywords:
|
|
|
220 |
return False
|
221 |
|
222 |
def _get_last_modified(self, headers: Dict, soup: BeautifulSoup) -> str:
|
|
|
223 |
# Check headers first
|
224 |
if 'last-modified' in headers:
|
225 |
return headers['last-modified']
|
|
|
234 |
return ""
|
235 |
|
236 |
def _is_valid_content_url(self, url: str) -> bool:
|
|
|
237 |
if not url:
|
238 |
return False
|
239 |
|
|
|
254 |
return True
|
255 |
|
256 |
def _is_same_domain(self, url1: str, url2: str) -> bool:
|
|
|
257 |
try:
|
258 |
domain1 = urlparse(url1).netloc
|
259 |
domain2 = urlparse(url2).netloc
|
|
|
262 |
return False
|
263 |
|
264 |
def _calculate_metrics(self, base_url: str, pages_data: List[Dict], quick_scan: bool) -> Dict[str, Any]:
|
|
|
265 |
total_pages = len(pages_data)
|
266 |
valid_pages = [p for p in pages_data if 'error' not in p]
|
267 |
|
|
|
309 |
}
|
310 |
|
311 |
def _analyze_content_freshness(self, pages_data: List[Dict]) -> Dict[str, Any]:
|
|
|
312 |
now = datetime.now()
|
313 |
six_months_ago = now - timedelta(days=180)
|
314 |
eighteen_months_ago = now - timedelta(days=540)
|
|
|
351 |
}
|
352 |
|
353 |
def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
|
|
|
354 |
return {
|
355 |
'url': url,
|
356 |
'error': f"Content audit failed: {error}",
|
modules/keywords.py
CHANGED
@@ -118,13 +118,11 @@ class KeywordsModule:
|
|
118 |
)
|
119 |
|
120 |
def _extract_domain(self, url: str) -> str:
|
121 |
-
"""Extract domain from URL"""
|
122 |
if not url.startswith(('http://', 'https://')):
|
123 |
url = 'https://' + url
|
124 |
return urlparse(url).netloc.replace('www.', '')
|
125 |
|
126 |
def _fetch_domain_keywords(self, domain: str, quick_scan: bool) -> Dict[str, Any]:
|
127 |
-
"""Fetch keywords data for a domain using Competitors Ranking Keywords API"""
|
128 |
try:
|
129 |
all_keywords = []
|
130 |
offset = 0
|
@@ -187,7 +185,6 @@ class KeywordsModule:
|
|
187 |
return {'success': False, 'error': str(e)}
|
188 |
|
189 |
def _calculate_domain_statistics(self, keywords: List[Dict]) -> Dict[str, Any]:
|
190 |
-
"""Calculate domain statistics from keywords data"""
|
191 |
total_keywords = len(keywords)
|
192 |
|
193 |
# Position distribution
|
@@ -221,7 +218,6 @@ class KeywordsModule:
|
|
221 |
|
222 |
def _process_keywords_data(self, main_data: Dict, competitor_data: Dict,
|
223 |
domain: str, competitor_domains: List[str]) -> Dict[str, Any]:
|
224 |
-
"""Process and structure the keywords data"""
|
225 |
stats = main_data['statistics']['organic']
|
226 |
keywords = main_data['keywords']
|
227 |
|
@@ -288,7 +284,6 @@ class KeywordsModule:
|
|
288 |
}
|
289 |
|
290 |
def _identify_best_keywords(self, keywords: List[Dict]) -> List[Dict]:
|
291 |
-
"""Identify best performing keywords"""
|
292 |
best_candidates = [
|
293 |
k for k in keywords
|
294 |
if k.get('rank', 100) <= 3 and k.get('estimated_traffic_volume', 0) > 10
|
@@ -310,7 +305,6 @@ class KeywordsModule:
|
|
310 |
]
|
311 |
|
312 |
def _identify_declining_keywords(self, keywords: List[Dict]) -> List[Dict]:
|
313 |
-
"""Identify keywords with declining performance"""
|
314 |
declining_candidates = []
|
315 |
|
316 |
for k in keywords:
|
@@ -333,7 +327,6 @@ class KeywordsModule:
|
|
333 |
|
334 |
def _analyze_competitor_gaps(self, main_keywords: List[Dict], competitor_data: Dict,
|
335 |
domain: str, competitor_domains: List[str]) -> Tuple[List[Dict], List[Dict]]:
|
336 |
-
"""Analyze competitor gaps and opportunities"""
|
337 |
opportunities = []
|
338 |
competitor_summary = []
|
339 |
|
@@ -385,10 +378,9 @@ class KeywordsModule:
|
|
385 |
# Sort all opportunities by priority score
|
386 |
opportunities.sort(key=lambda x: x['priority_score'], reverse=True)
|
387 |
|
388 |
-
return opportunities[:50], competitor_summary
|
389 |
|
390 |
def _calculate_opportunity_score(self, competitor_rank: int, search_volume: int, difficulty: int) -> float:
|
391 |
-
"""Calculate opportunity score using the PRD algorithm"""
|
392 |
position_ctr = {1: 28, 2: 15, 3: 11, 4: 8, 5: 7, 10: 2, 20: 1}
|
393 |
|
394 |
# Find closest CTR value
|
@@ -406,7 +398,6 @@ class KeywordsModule:
|
|
406 |
return min(round(score, 1), 100)
|
407 |
|
408 |
def _estimate_difficulty(self, rank: int, volume: int) -> int:
|
409 |
-
"""Estimate keyword difficulty based on rank and volume"""
|
410 |
# Simple heuristic - in practice, this would come from a keyword difficulty API
|
411 |
if rank <= 3:
|
412 |
return 20 + (volume // 1000) * 5
|
@@ -416,7 +407,6 @@ class KeywordsModule:
|
|
416 |
return 50 + (volume // 1000) * 2
|
417 |
|
418 |
def _enrich_keywords_data(self, keywords: List[Dict]) -> List[Dict]:
|
419 |
-
"""Enrich keywords with volume and CPC data"""
|
420 |
# Identify keywords needing enrichment
|
421 |
keywords_to_enrich = [
|
422 |
k for k in keywords
|
@@ -445,7 +435,6 @@ class KeywordsModule:
|
|
445 |
return enriched_keywords
|
446 |
|
447 |
def _batch_enrich_keywords(self, keywords: List[str]) -> Dict[str, Dict]:
|
448 |
-
"""Batch enrich keywords using Google Keyword Insight API"""
|
449 |
enriched_data = {}
|
450 |
|
451 |
# Process in batches
|
@@ -518,17 +507,14 @@ class KeywordsModule:
|
|
518 |
return enriched_data
|
519 |
|
520 |
def _get_cache_key(self, keyword: str) -> str:
|
521 |
-
"""Generate cache key for keyword"""
|
522 |
return hashlib.md5(keyword.lower().encode()).hexdigest()
|
523 |
|
524 |
def _calculate_enrichment_rate(self, keywords: List[Dict]) -> float:
|
525 |
-
"""Calculate the percentage of keywords with volume data"""
|
526 |
enriched = sum(1 for k in keywords if k.get('avg_search_volume', 0) > 0)
|
527 |
total = len(keywords)
|
528 |
return round(enriched / total * 100, 1) if total > 0 else 0
|
529 |
|
530 |
def _determine_trend(self, keyword_data: Dict) -> str:
|
531 |
-
"""Determine keyword trend based on rank changes"""
|
532 |
current_rank = keyword_data.get('rank', 100)
|
533 |
previous_rank = keyword_data.get('previous_rank', 100)
|
534 |
|
@@ -542,13 +528,11 @@ class KeywordsModule:
|
|
542 |
return 'stable'
|
543 |
|
544 |
def _rate_limit_primary_api(self):
|
545 |
-
"""Rate limiting for primary API (60 requests/minute)"""
|
546 |
current_time = time.time()
|
547 |
-
if current_time - self.last_primary_call < 1:
|
548 |
time.sleep(1)
|
549 |
|
550 |
def _rate_limit_enrichment_api(self):
|
551 |
-
"""Rate limiting for enrichment API (100 requests/minute)"""
|
552 |
current_time = time.time()
|
553 |
-
if current_time - self.last_enrichment_call < 0.6:
|
554 |
time.sleep(0.6)
|
|
|
118 |
)
|
119 |
|
120 |
def _extract_domain(self, url: str) -> str:
|
|
|
121 |
if not url.startswith(('http://', 'https://')):
|
122 |
url = 'https://' + url
|
123 |
return urlparse(url).netloc.replace('www.', '')
|
124 |
|
125 |
def _fetch_domain_keywords(self, domain: str, quick_scan: bool) -> Dict[str, Any]:
|
|
|
126 |
try:
|
127 |
all_keywords = []
|
128 |
offset = 0
|
|
|
185 |
return {'success': False, 'error': str(e)}
|
186 |
|
187 |
def _calculate_domain_statistics(self, keywords: List[Dict]) -> Dict[str, Any]:
|
|
|
188 |
total_keywords = len(keywords)
|
189 |
|
190 |
# Position distribution
|
|
|
218 |
|
219 |
def _process_keywords_data(self, main_data: Dict, competitor_data: Dict,
|
220 |
domain: str, competitor_domains: List[str]) -> Dict[str, Any]:
|
|
|
221 |
stats = main_data['statistics']['organic']
|
222 |
keywords = main_data['keywords']
|
223 |
|
|
|
284 |
}
|
285 |
|
286 |
def _identify_best_keywords(self, keywords: List[Dict]) -> List[Dict]:
|
|
|
287 |
best_candidates = [
|
288 |
k for k in keywords
|
289 |
if k.get('rank', 100) <= 3 and k.get('estimated_traffic_volume', 0) > 10
|
|
|
305 |
]
|
306 |
|
307 |
def _identify_declining_keywords(self, keywords: List[Dict]) -> List[Dict]:
|
|
|
308 |
declining_candidates = []
|
309 |
|
310 |
for k in keywords:
|
|
|
327 |
|
328 |
def _analyze_competitor_gaps(self, main_keywords: List[Dict], competitor_data: Dict,
|
329 |
domain: str, competitor_domains: List[str]) -> Tuple[List[Dict], List[Dict]]:
|
|
|
330 |
opportunities = []
|
331 |
competitor_summary = []
|
332 |
|
|
|
378 |
# Sort all opportunities by priority score
|
379 |
opportunities.sort(key=lambda x: x['priority_score'], reverse=True)
|
380 |
|
381 |
+
return opportunities[:50], competitor_summary
|
382 |
|
383 |
def _calculate_opportunity_score(self, competitor_rank: int, search_volume: int, difficulty: int) -> float:
|
|
|
384 |
position_ctr = {1: 28, 2: 15, 3: 11, 4: 8, 5: 7, 10: 2, 20: 1}
|
385 |
|
386 |
# Find closest CTR value
|
|
|
398 |
return min(round(score, 1), 100)
|
399 |
|
400 |
def _estimate_difficulty(self, rank: int, volume: int) -> int:
|
|
|
401 |
# Simple heuristic - in practice, this would come from a keyword difficulty API
|
402 |
if rank <= 3:
|
403 |
return 20 + (volume // 1000) * 5
|
|
|
407 |
return 50 + (volume // 1000) * 2
|
408 |
|
409 |
def _enrich_keywords_data(self, keywords: List[Dict]) -> List[Dict]:
|
|
|
410 |
# Identify keywords needing enrichment
|
411 |
keywords_to_enrich = [
|
412 |
k for k in keywords
|
|
|
435 |
return enriched_keywords
|
436 |
|
437 |
def _batch_enrich_keywords(self, keywords: List[str]) -> Dict[str, Dict]:
|
|
|
438 |
enriched_data = {}
|
439 |
|
440 |
# Process in batches
|
|
|
507 |
return enriched_data
|
508 |
|
509 |
def _get_cache_key(self, keyword: str) -> str:
|
|
|
510 |
return hashlib.md5(keyword.lower().encode()).hexdigest()
|
511 |
|
512 |
def _calculate_enrichment_rate(self, keywords: List[Dict]) -> float:
|
|
|
513 |
enriched = sum(1 for k in keywords if k.get('avg_search_volume', 0) > 0)
|
514 |
total = len(keywords)
|
515 |
return round(enriched / total * 100, 1) if total > 0 else 0
|
516 |
|
517 |
def _determine_trend(self, keyword_data: Dict) -> str:
|
|
|
518 |
current_rank = keyword_data.get('rank', 100)
|
519 |
previous_rank = keyword_data.get('previous_rank', 100)
|
520 |
|
|
|
528 |
return 'stable'
|
529 |
|
530 |
def _rate_limit_primary_api(self):
|
|
|
531 |
current_time = time.time()
|
532 |
+
if current_time - self.last_primary_call < 1:
|
533 |
time.sleep(1)
|
534 |
|
535 |
def _rate_limit_enrichment_api(self):
|
|
|
536 |
current_time = time.time()
|
537 |
+
if current_time - self.last_enrichment_call < 0.6:
|
538 |
time.sleep(0.6)
|
modules/technical_seo.py
CHANGED
@@ -4,12 +4,6 @@ from typing import Dict, Any, Optional
|
|
4 |
|
5 |
class TechnicalSEOModule:
|
6 |
def __init__(self, api_key: Optional[str] = None):
|
7 |
-
"""
|
8 |
-
Initialize Technical SEO module
|
9 |
-
|
10 |
-
Args:
|
11 |
-
api_key: Google PageSpeed Insights API key (optional for basic usage)
|
12 |
-
"""
|
13 |
self.api_key = api_key
|
14 |
self.base_url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
|
15 |
|
@@ -45,7 +39,6 @@ class TechnicalSEOModule:
|
|
45 |
return self._get_fallback_data(url, str(e))
|
46 |
|
47 |
def _get_pagespeed_data(self, url: str, strategy: str) -> Dict[str, Any]:
|
48 |
-
"""Get PageSpeed Insights data for URL and strategy"""
|
49 |
params = {
|
50 |
'url': url,
|
51 |
'strategy': strategy,
|
@@ -64,7 +57,6 @@ class TechnicalSEOModule:
|
|
64 |
raise
|
65 |
|
66 |
def _extract_metrics(self, data: Dict[str, Any], strategy: str) -> Dict[str, Any]:
|
67 |
-
"""Extract key performance metrics from PageSpeed data"""
|
68 |
lighthouse_result = data.get('lighthouseResult', {})
|
69 |
categories = lighthouse_result.get('categories', {})
|
70 |
audits = lighthouse_result.get('audits', {})
|
@@ -91,7 +83,6 @@ class TechnicalSEOModule:
|
|
91 |
}
|
92 |
|
93 |
def _extract_core_web_vitals(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
|
94 |
-
"""Extract Core Web Vitals metrics"""
|
95 |
def get_metric_value(data, metric_key):
|
96 |
audits = data.get('lighthouseResult', {}).get('audits', {})
|
97 |
metric = audits.get(metric_key, {})
|
@@ -116,7 +107,6 @@ class TechnicalSEOModule:
|
|
116 |
}
|
117 |
|
118 |
def _extract_opportunities(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
|
119 |
-
"""Extract optimization opportunities"""
|
120 |
mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
|
121 |
|
122 |
opportunities = []
|
@@ -128,7 +118,7 @@ class TechnicalSEOModule:
|
|
128 |
|
129 |
for key in opportunity_keys:
|
130 |
audit = mobile_audits.get(key, {})
|
131 |
-
if audit.get('score', 1) < 0.9:
|
132 |
opportunities.append({
|
133 |
'id': key,
|
134 |
'title': audit.get('title', key.replace('-', ' ').title()),
|
@@ -137,10 +127,9 @@ class TechnicalSEOModule:
|
|
137 |
'potential_savings': audit.get('details', {}).get('overallSavingsMs', 0)
|
138 |
})
|
139 |
|
140 |
-
return {'opportunities': opportunities[:5]}
|
141 |
|
142 |
def _extract_diagnostics(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
|
143 |
-
"""Extract diagnostic information"""
|
144 |
mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
|
145 |
|
146 |
diagnostics = []
|
@@ -162,7 +151,6 @@ class TechnicalSEOModule:
|
|
162 |
return {'diagnostics': diagnostics}
|
163 |
|
164 |
def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
|
165 |
-
"""Return fallback data when API fails"""
|
166 |
return {
|
167 |
'url': url,
|
168 |
'error': f"PageSpeed API unavailable: {error}",
|
|
|
4 |
|
5 |
class TechnicalSEOModule:
|
6 |
def __init__(self, api_key: Optional[str] = None):
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
self.api_key = api_key
|
8 |
self.base_url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
|
9 |
|
|
|
39 |
return self._get_fallback_data(url, str(e))
|
40 |
|
41 |
def _get_pagespeed_data(self, url: str, strategy: str) -> Dict[str, Any]:
|
|
|
42 |
params = {
|
43 |
'url': url,
|
44 |
'strategy': strategy,
|
|
|
57 |
raise
|
58 |
|
59 |
def _extract_metrics(self, data: Dict[str, Any], strategy: str) -> Dict[str, Any]:
|
|
|
60 |
lighthouse_result = data.get('lighthouseResult', {})
|
61 |
categories = lighthouse_result.get('categories', {})
|
62 |
audits = lighthouse_result.get('audits', {})
|
|
|
83 |
}
|
84 |
|
85 |
def _extract_core_web_vitals(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
86 |
def get_metric_value(data, metric_key):
|
87 |
audits = data.get('lighthouseResult', {}).get('audits', {})
|
88 |
metric = audits.get(metric_key, {})
|
|
|
107 |
}
|
108 |
|
109 |
def _extract_opportunities(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
110 |
mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
|
111 |
|
112 |
opportunities = []
|
|
|
118 |
|
119 |
for key in opportunity_keys:
|
120 |
audit = mobile_audits.get(key, {})
|
121 |
+
if audit.get('score', 1) < 0.9:
|
122 |
opportunities.append({
|
123 |
'id': key,
|
124 |
'title': audit.get('title', key.replace('-', ' ').title()),
|
|
|
127 |
'potential_savings': audit.get('details', {}).get('overallSavingsMs', 0)
|
128 |
})
|
129 |
|
130 |
+
return {'opportunities': opportunities[:5]}
|
131 |
|
132 |
def _extract_diagnostics(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
133 |
mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
|
134 |
|
135 |
diagnostics = []
|
|
|
151 |
return {'diagnostics': diagnostics}
|
152 |
|
153 |
def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
|
|
|
154 |
return {
|
155 |
'url': url,
|
156 |
'error': f"PageSpeed API unavailable: {error}",
|
report_generator.py
CHANGED
@@ -5,11 +5,45 @@ import plotly.graph_objects as go
|
|
5 |
import plotly.express as px
|
6 |
from plotly.offline import plot
|
7 |
import plotly
|
|
|
8 |
|
9 |
class ReportGenerator:
|
10 |
def __init__(self):
|
11 |
self.report_template = self._get_report_template()
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def generate_html_report(self, url: str, technical_data: Dict[str, Any],
|
14 |
content_data: Dict[str, Any], competitor_data: List[Dict] = None,
|
15 |
keywords_data: Dict[str, Any] = None, backlinks_data: Dict[str, Any] = None,
|
@@ -44,8 +78,7 @@ class ReportGenerator:
|
|
44 |
if competitor_data:
|
45 |
competitor_section = self._generate_competitor_section(competitor_data, technical_data, content_data)
|
46 |
|
47 |
-
|
48 |
-
placeholder_sections = self._generate_placeholder_sections()
|
49 |
|
50 |
# Generate recommendations
|
51 |
recommendations = self._generate_recommendations(technical_data, content_data)
|
@@ -61,7 +94,7 @@ class ReportGenerator:
|
|
61 |
keywords_section=keywords_section,
|
62 |
backlinks_section=backlinks_section,
|
63 |
competitor_section=competitor_section,
|
64 |
-
|
65 |
recommendations=recommendations,
|
66 |
llm_recommendations=recommendations_section
|
67 |
)
|
@@ -538,50 +571,7 @@ class ReportGenerator:
|
|
538 |
|
539 |
return comparison_html
|
540 |
|
541 |
-
|
542 |
-
"""Generate placeholder sections for future modules"""
|
543 |
-
return """
|
544 |
-
<div class="placeholder-sections">
|
545 |
-
<div class="placeholder-section">
|
546 |
-
<h3>🔍 Keyword Rankings</h3>
|
547 |
-
<div class="placeholder-content">
|
548 |
-
<p><em>Coming in future versions</em></p>
|
549 |
-
<ul>
|
550 |
-
<li>Google Search Console integration</li>
|
551 |
-
<li>Keyword ranking positions</li>
|
552 |
-
<li>Search volume analysis</li>
|
553 |
-
<li>Keyword opportunities</li>
|
554 |
-
</ul>
|
555 |
-
</div>
|
556 |
-
</div>
|
557 |
-
|
558 |
-
<div class="placeholder-section">
|
559 |
-
<h3>🔗 Backlink Profile</h3>
|
560 |
-
<div class="placeholder-content">
|
561 |
-
<p><em>Coming in future versions</em></p>
|
562 |
-
<ul>
|
563 |
-
<li>Total backlinks and referring domains</li>
|
564 |
-
<li>Domain authority metrics</li>
|
565 |
-
<li>Anchor text analysis</li>
|
566 |
-
<li>Link acquisition opportunities</li>
|
567 |
-
</ul>
|
568 |
-
</div>
|
569 |
-
</div>
|
570 |
-
|
571 |
-
<div class="placeholder-section">
|
572 |
-
<h3>📈 Conversion Tracking</h3>
|
573 |
-
<div class="placeholder-content">
|
574 |
-
<p><em>Coming in future versions</em></p>
|
575 |
-
<ul>
|
576 |
-
<li>Google Analytics integration</li>
|
577 |
-
<li>Organic traffic conversion rates</li>
|
578 |
-
<li>Goal completion tracking</li>
|
579 |
-
<li>Revenue attribution</li>
|
580 |
-
</ul>
|
581 |
-
</div>
|
582 |
-
</div>
|
583 |
-
</div>
|
584 |
-
"""
|
585 |
|
586 |
def _generate_recommendations(self, technical_data: Dict[str, Any], content_data: Dict[str, Any]) -> str:
|
587 |
"""Generate prioritized recommendations"""
|
@@ -830,11 +820,11 @@ class ReportGenerator:
|
|
830 |
"""
|
831 |
|
832 |
def _generate_recommendations_section(self, llm_recommendations: Dict[str, Any]) -> str:
|
833 |
-
"""Generate LLM-powered recommendations section"""
|
834 |
if not llm_recommendations:
|
835 |
return ""
|
836 |
|
837 |
-
|
838 |
executive_insights = llm_recommendations.get('executive_insights', [])
|
839 |
priority_actions = llm_recommendations.get('priority_actions', [])
|
840 |
|
@@ -861,12 +851,17 @@ class ReportGenerator:
|
|
861 |
"""
|
862 |
priority_html += "</div>"
|
863 |
|
|
|
864 |
recommendations_html = ""
|
865 |
-
if
|
866 |
-
recommendations_html = "
|
867 |
-
|
868 |
-
|
869 |
-
|
|
|
|
|
|
|
|
|
870 |
|
871 |
return f"""
|
872 |
<div class="card">
|
@@ -1258,6 +1253,56 @@ class ReportGenerator:
|
|
1258 |
text-align: center;
|
1259 |
}}
|
1260 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1261 |
@media (max-width: 768px) {{
|
1262 |
.report-container {{
|
1263 |
padding: 10px;
|
|
|
5 |
import plotly.express as px
|
6 |
from plotly.offline import plot
|
7 |
import plotly
|
8 |
+
import re
|
9 |
|
10 |
class ReportGenerator:
|
11 |
def __init__(self):
|
12 |
self.report_template = self._get_report_template()
|
13 |
|
14 |
+
def _markdown_to_html(self, markdown_text: str) -> str:
|
15 |
+
"""Convert simple markdown to HTML"""
|
16 |
+
if not markdown_text:
|
17 |
+
return ""
|
18 |
+
|
19 |
+
html = markdown_text
|
20 |
+
|
21 |
+
# Convert headers
|
22 |
+
html = re.sub(r'^### (.*?)$', r'<h3>\1</h3>', html, flags=re.MULTILINE)
|
23 |
+
html = re.sub(r'^## (.*?)$', r'<h2>\1</h2>', html, flags=re.MULTILINE)
|
24 |
+
html = re.sub(r'^# (.*?)$', r'<h1>\1</h1>', html, flags=re.MULTILINE)
|
25 |
+
|
26 |
+
# Convert bold text
|
27 |
+
html = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', html)
|
28 |
+
|
29 |
+
# Convert bullet points
|
30 |
+
html = re.sub(r'^- (.*?)$', r'<li>\1</li>', html, flags=re.MULTILINE)
|
31 |
+
html = re.sub(r'^• (.*?)$', r'<li>\1</li>', html, flags=re.MULTILINE)
|
32 |
+
|
33 |
+
# Wrap consecutive <li> tags in <ul>
|
34 |
+
html = re.sub(r'(<li>.*?</li>(?:\s*<li>.*?</li>)*)', r'<ul>\1</ul>', html, flags=re.DOTALL)
|
35 |
+
|
36 |
+
# Convert line breaks to <br> tags
|
37 |
+
html = html.replace('\n', '<br>')
|
38 |
+
|
39 |
+
# Clean up extra <br> tags around block elements
|
40 |
+
html = re.sub(r'<br>\s*(<h[1-6]>)', r'\1', html)
|
41 |
+
html = re.sub(r'(</h[1-6]>)\s*<br>', r'\1', html)
|
42 |
+
html = re.sub(r'<br>\s*(<ul>)', r'\1', html)
|
43 |
+
html = re.sub(r'(</ul>)\s*<br>', r'\1', html)
|
44 |
+
|
45 |
+
return html
|
46 |
+
|
47 |
def generate_html_report(self, url: str, technical_data: Dict[str, Any],
|
48 |
content_data: Dict[str, Any], competitor_data: List[Dict] = None,
|
49 |
keywords_data: Dict[str, Any] = None, backlinks_data: Dict[str, Any] = None,
|
|
|
78 |
if competitor_data:
|
79 |
competitor_section = self._generate_competitor_section(competitor_data, technical_data, content_data)
|
80 |
|
81 |
+
|
|
|
82 |
|
83 |
# Generate recommendations
|
84 |
recommendations = self._generate_recommendations(technical_data, content_data)
|
|
|
94 |
keywords_section=keywords_section,
|
95 |
backlinks_section=backlinks_section,
|
96 |
competitor_section=competitor_section,
|
97 |
+
|
98 |
recommendations=recommendations,
|
99 |
llm_recommendations=recommendations_section
|
100 |
)
|
|
|
571 |
|
572 |
return comparison_html
|
573 |
|
574 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
575 |
|
576 |
def _generate_recommendations(self, technical_data: Dict[str, Any], content_data: Dict[str, Any]) -> str:
|
577 |
"""Generate prioritized recommendations"""
|
|
|
820 |
"""
|
821 |
|
822 |
def _generate_recommendations_section(self, llm_recommendations: Dict[str, Any]) -> str:
|
823 |
+
"""Generate LLM-powered recommendations section with markdown rendering"""
|
824 |
if not llm_recommendations:
|
825 |
return ""
|
826 |
|
827 |
+
recommendations_markdown = llm_recommendations.get('recommendations_markdown', '')
|
828 |
executive_insights = llm_recommendations.get('executive_insights', [])
|
829 |
priority_actions = llm_recommendations.get('priority_actions', [])
|
830 |
|
|
|
851 |
"""
|
852 |
priority_html += "</div>"
|
853 |
|
854 |
+
# Convert markdown recommendations to HTML
|
855 |
recommendations_html = ""
|
856 |
+
if recommendations_markdown:
|
857 |
+
recommendations_html = f"""
|
858 |
+
<div class='llm-recommendations'>
|
859 |
+
<h4>🤖 AI-Generated Recommendations</h4>
|
860 |
+
<div class="markdown-content">
|
861 |
+
{self._markdown_to_html(recommendations_markdown)}
|
862 |
+
</div>
|
863 |
+
</div>
|
864 |
+
"""
|
865 |
|
866 |
return f"""
|
867 |
<div class="card">
|
|
|
1253 |
text-align: center;
|
1254 |
}}
|
1255 |
|
1256 |
+
.markdown-content {{
|
1257 |
+
line-height: 1.6;
|
1258 |
+
color: #2c3e50;
|
1259 |
+
}}
|
1260 |
+
|
1261 |
+
.markdown-content h1 {{
|
1262 |
+
color: #2c3e50;
|
1263 |
+
border-bottom: 2px solid #3498db;
|
1264 |
+
padding-bottom: 10px;
|
1265 |
+
margin-top: 30px;
|
1266 |
+
margin-bottom: 20px;
|
1267 |
+
}}
|
1268 |
+
|
1269 |
+
.markdown-content h2 {{
|
1270 |
+
color: #34495e;
|
1271 |
+
margin-top: 25px;
|
1272 |
+
margin-bottom: 15px;
|
1273 |
+
font-size: 1.3em;
|
1274 |
+
}}
|
1275 |
+
|
1276 |
+
.markdown-content h3 {{
|
1277 |
+
color: #34495e;
|
1278 |
+
margin-top: 20px;
|
1279 |
+
margin-bottom: 10px;
|
1280 |
+
font-size: 1.1em;
|
1281 |
+
}}
|
1282 |
+
|
1283 |
+
.markdown-content strong {{
|
1284 |
+
color: #2c3e50;
|
1285 |
+
font-weight: 600;
|
1286 |
+
}}
|
1287 |
+
|
1288 |
+
.markdown-content ul {{
|
1289 |
+
margin: 15px 0;
|
1290 |
+
padding-left: 20px;
|
1291 |
+
}}
|
1292 |
+
|
1293 |
+
.markdown-content li {{
|
1294 |
+
margin-bottom: 8px;
|
1295 |
+
line-height: 1.5;
|
1296 |
+
}}
|
1297 |
+
|
1298 |
+
.llm-recommendations {{
|
1299 |
+
background: #f8f9fa;
|
1300 |
+
border-left: 4px solid #3498db;
|
1301 |
+
padding: 20px;
|
1302 |
+
margin: 20px 0;
|
1303 |
+
border-radius: 0 8px 8px 0;
|
1304 |
+
}}
|
1305 |
+
|
1306 |
@media (max-width: 768px) {{
|
1307 |
.report-container {{
|
1308 |
padding: 10px;
|
simple_pdf_generator.py
CHANGED
@@ -17,13 +17,10 @@ class SimplePDFGenerator:
|
|
17 |
self.available = False
|
18 |
|
19 |
def generate_pdf(self, html_content: str) -> bytes:
|
20 |
-
"""
|
21 |
-
Generate PDF from HTML content with better formatting
|
22 |
-
"""
|
23 |
if not self.available:
|
24 |
raise ImportError("PDF generation requires reportlab: pip install reportlab")
|
25 |
|
26 |
-
|
27 |
from reportlab.pdfgen import canvas
|
28 |
from reportlab.lib.pagesizes import letter, A4
|
29 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
@@ -36,10 +33,10 @@ class SimplePDFGenerator:
|
|
36 |
# Parse HTML and extract content
|
37 |
soup = BeautifulSoup(html_content, 'html.parser')
|
38 |
|
39 |
-
|
40 |
buffer = io.BytesIO()
|
41 |
|
42 |
-
|
43 |
doc = SimpleDocTemplate(
|
44 |
buffer,
|
45 |
pagesize=A4,
|
@@ -49,17 +46,17 @@ class SimplePDFGenerator:
|
|
49 |
rightMargin=0.75*inch
|
50 |
)
|
51 |
|
52 |
-
|
53 |
styles = getSampleStyleSheet()
|
54 |
|
55 |
-
|
56 |
title_style = ParagraphStyle(
|
57 |
'CustomTitle',
|
58 |
parent=styles['Heading1'],
|
59 |
fontSize=24,
|
60 |
textColor=black,
|
61 |
spaceAfter=20,
|
62 |
-
alignment=1
|
63 |
)
|
64 |
|
65 |
header_style = ParagraphStyle(
|
@@ -82,7 +79,7 @@ class SimplePDFGenerator:
|
|
82 |
|
83 |
story = []
|
84 |
|
85 |
-
|
86 |
title = "SEO Analysis Report"
|
87 |
url_elem = soup.find(string=re.compile(r'https?://'))
|
88 |
if url_elem:
|
@@ -93,13 +90,13 @@ class SimplePDFGenerator:
|
|
93 |
story.append(Paragraph(title, title_style))
|
94 |
story.append(Spacer(1, 20))
|
95 |
|
96 |
-
|
97 |
self._extract_executive_summary(soup, story, header_style, styles['Normal'])
|
98 |
self._extract_technical_seo(soup, story, header_style, subheader_style, styles['Normal'])
|
99 |
self._extract_content_audit(soup, story, header_style, subheader_style, styles['Normal'])
|
100 |
self._extract_recommendations(soup, story, header_style, styles['Normal'])
|
101 |
|
102 |
-
|
103 |
doc.build(story)
|
104 |
|
105 |
# Get PDF data
|
@@ -107,12 +104,11 @@ class SimplePDFGenerator:
|
|
107 |
return buffer.getvalue()
|
108 |
|
109 |
def _extract_executive_summary(self, soup, story, header_style, normal_style):
|
110 |
-
"""Extract executive summary section"""
|
111 |
exec_section = soup.find(string=re.compile(r'Executive Summary', re.I))
|
112 |
if exec_section:
|
113 |
story.append(Paragraph("Executive Summary", header_style))
|
114 |
|
115 |
-
|
116 |
health_text = soup.find(string=re.compile(r'Overall SEO Health', re.I))
|
117 |
if health_text:
|
118 |
parent = health_text.find_parent()
|
@@ -122,14 +118,13 @@ class SimplePDFGenerator:
|
|
122 |
story.append(Spacer(1, 10))
|
123 |
|
124 |
def _extract_technical_seo(self, soup, story, header_style, subheader_style, normal_style):
|
125 |
-
"""Extract technical SEO section"""
|
126 |
tech_section = soup.find(string=re.compile(r'Technical SEO', re.I))
|
127 |
if tech_section:
|
128 |
story.append(Paragraph("Technical SEO Analysis", header_style))
|
129 |
|
130 |
-
|
131 |
perf_elements = soup.find_all(string=re.compile(r'Performance Score|Mobile|Desktop', re.I))
|
132 |
-
for elem in perf_elements[:3]:
|
133 |
parent = elem.find_parent()
|
134 |
if parent:
|
135 |
text = parent.get_text().strip()
|
@@ -138,14 +133,13 @@ class SimplePDFGenerator:
|
|
138 |
story.append(Spacer(1, 10))
|
139 |
|
140 |
def _extract_content_audit(self, soup, story, header_style, subheader_style, normal_style):
|
141 |
-
"""Extract content audit section"""
|
142 |
content_section = soup.find(string=re.compile(r'Content Audit', re.I))
|
143 |
if content_section:
|
144 |
story.append(Paragraph("Content Audit", header_style))
|
145 |
|
146 |
-
|
147 |
content_elements = soup.find_all(string=re.compile(r'Pages Analyzed|Metadata|Word Count', re.I))
|
148 |
-
for elem in content_elements[:3]:
|
149 |
parent = elem.find_parent()
|
150 |
if parent:
|
151 |
text = parent.get_text().strip()
|
@@ -154,23 +148,19 @@ class SimplePDFGenerator:
|
|
154 |
story.append(Spacer(1, 10))
|
155 |
|
156 |
def _extract_recommendations(self, soup, story, header_style, normal_style):
|
157 |
-
"""Extract recommendations section"""
|
158 |
rec_section = soup.find(string=re.compile(r'Recommendation', re.I))
|
159 |
if rec_section:
|
160 |
story.append(Paragraph("Recommendations", header_style))
|
161 |
|
162 |
-
|
163 |
rec_elements = soup.find_all('li')
|
164 |
-
for elem in rec_elements[:5]:
|
165 |
text = elem.get_text().strip()
|
166 |
if len(text) > 15:
|
167 |
story.append(Paragraph(f"• {text}", normal_style))
|
168 |
story.append(Spacer(1, 10))
|
169 |
|
170 |
def create_browser_pdf_instructions() -> str:
|
171 |
-
"""
|
172 |
-
Return instructions for manual PDF creation using browser
|
173 |
-
"""
|
174 |
return """
|
175 |
## How to Create PDF from HTML Report:
|
176 |
|
|
|
17 |
self.available = False
|
18 |
|
19 |
def generate_pdf(self, html_content: str) -> bytes:
|
|
|
|
|
|
|
20 |
if not self.available:
|
21 |
raise ImportError("PDF generation requires reportlab: pip install reportlab")
|
22 |
|
23 |
+
|
24 |
from reportlab.pdfgen import canvas
|
25 |
from reportlab.lib.pagesizes import letter, A4
|
26 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
|
|
33 |
# Parse HTML and extract content
|
34 |
soup = BeautifulSoup(html_content, 'html.parser')
|
35 |
|
36 |
+
|
37 |
buffer = io.BytesIO()
|
38 |
|
39 |
+
|
40 |
doc = SimpleDocTemplate(
|
41 |
buffer,
|
42 |
pagesize=A4,
|
|
|
46 |
rightMargin=0.75*inch
|
47 |
)
|
48 |
|
49 |
+
|
50 |
styles = getSampleStyleSheet()
|
51 |
|
52 |
+
|
53 |
title_style = ParagraphStyle(
|
54 |
'CustomTitle',
|
55 |
parent=styles['Heading1'],
|
56 |
fontSize=24,
|
57 |
textColor=black,
|
58 |
spaceAfter=20,
|
59 |
+
alignment=1
|
60 |
)
|
61 |
|
62 |
header_style = ParagraphStyle(
|
|
|
79 |
|
80 |
story = []
|
81 |
|
82 |
+
|
83 |
title = "SEO Analysis Report"
|
84 |
url_elem = soup.find(string=re.compile(r'https?://'))
|
85 |
if url_elem:
|
|
|
90 |
story.append(Paragraph(title, title_style))
|
91 |
story.append(Spacer(1, 20))
|
92 |
|
93 |
+
|
94 |
self._extract_executive_summary(soup, story, header_style, styles['Normal'])
|
95 |
self._extract_technical_seo(soup, story, header_style, subheader_style, styles['Normal'])
|
96 |
self._extract_content_audit(soup, story, header_style, subheader_style, styles['Normal'])
|
97 |
self._extract_recommendations(soup, story, header_style, styles['Normal'])
|
98 |
|
99 |
+
|
100 |
doc.build(story)
|
101 |
|
102 |
# Get PDF data
|
|
|
104 |
return buffer.getvalue()
|
105 |
|
106 |
def _extract_executive_summary(self, soup, story, header_style, normal_style):
|
|
|
107 |
exec_section = soup.find(string=re.compile(r'Executive Summary', re.I))
|
108 |
if exec_section:
|
109 |
story.append(Paragraph("Executive Summary", header_style))
|
110 |
|
111 |
+
|
112 |
health_text = soup.find(string=re.compile(r'Overall SEO Health', re.I))
|
113 |
if health_text:
|
114 |
parent = health_text.find_parent()
|
|
|
118 |
story.append(Spacer(1, 10))
|
119 |
|
120 |
def _extract_technical_seo(self, soup, story, header_style, subheader_style, normal_style):
|
|
|
121 |
tech_section = soup.find(string=re.compile(r'Technical SEO', re.I))
|
122 |
if tech_section:
|
123 |
story.append(Paragraph("Technical SEO Analysis", header_style))
|
124 |
|
125 |
+
|
126 |
perf_elements = soup.find_all(string=re.compile(r'Performance Score|Mobile|Desktop', re.I))
|
127 |
+
for elem in perf_elements[:3]:
|
128 |
parent = elem.find_parent()
|
129 |
if parent:
|
130 |
text = parent.get_text().strip()
|
|
|
133 |
story.append(Spacer(1, 10))
|
134 |
|
135 |
def _extract_content_audit(self, soup, story, header_style, subheader_style, normal_style):
|
|
|
136 |
content_section = soup.find(string=re.compile(r'Content Audit', re.I))
|
137 |
if content_section:
|
138 |
story.append(Paragraph("Content Audit", header_style))
|
139 |
|
140 |
+
|
141 |
content_elements = soup.find_all(string=re.compile(r'Pages Analyzed|Metadata|Word Count', re.I))
|
142 |
+
for elem in content_elements[:3]:
|
143 |
parent = elem.find_parent()
|
144 |
if parent:
|
145 |
text = parent.get_text().strip()
|
|
|
148 |
story.append(Spacer(1, 10))
|
149 |
|
150 |
def _extract_recommendations(self, soup, story, header_style, normal_style):
|
|
|
151 |
rec_section = soup.find(string=re.compile(r'Recommendation', re.I))
|
152 |
if rec_section:
|
153 |
story.append(Paragraph("Recommendations", header_style))
|
154 |
|
155 |
+
|
156 |
rec_elements = soup.find_all('li')
|
157 |
+
for elem in rec_elements[:5]:
|
158 |
text = elem.get_text().strip()
|
159 |
if len(text) > 15:
|
160 |
story.append(Paragraph(f"• {text}", normal_style))
|
161 |
story.append(Spacer(1, 10))
|
162 |
|
163 |
def create_browser_pdf_instructions() -> str:
|
|
|
|
|
|
|
164 |
return """
|
165 |
## How to Create PDF from HTML Report:
|
166 |
|