yashgori20 commited on
Commit
9bf19c4
·
1 Parent(s): 795900a
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # Thinkly Labs SEO - Flask Application
2
  from flask import Flask, render_template, request, jsonify, send_file, redirect, url_for
3
  import validators
4
  import os
@@ -7,7 +7,7 @@ import uuid
7
  from urllib.parse import urlparse
8
  from typing import Dict, Any
9
 
10
- # Import SEO modules
11
  from modules.technical_seo import TechnicalSEOModule
12
  from modules.content_audit import ContentAuditModule
13
  from modules.keywords import KeywordsModule
@@ -19,7 +19,7 @@ from llm_recommendations import LLMRecommendations
19
  app = Flask(__name__, static_folder='static')
20
  app.secret_key = 'seo_report_generator_2024'
21
 
22
- # Initialize modules
23
  technical_module = TechnicalSEOModule()
24
  content_module = ContentAuditModule()
25
  keywords_module = KeywordsModule()
@@ -28,11 +28,10 @@ report_gen = ReportGenerator()
28
  pdf_gen = SimplePDFGenerator()
29
  llm_recommendations = LLMRecommendations()
30
 
31
- # Store for generated reports (in production, use database)
32
  reports_store = {}
33
 
34
  def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
35
- """Transform new keywords data structure to match report generator expectations"""
36
  if not new_data or new_data.get('placeholder'):
37
  return {
38
  'placeholder': True,
@@ -44,7 +43,7 @@ def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
44
  'data_source': 'Analysis failed'
45
  }
46
 
47
- # Transform new structure to old structure
48
  totals = new_data.get('totals', {})
49
  distribution = new_data.get('distribution', {})
50
  movement = new_data.get('movement', {})
@@ -53,7 +52,7 @@ def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
53
  opportunities = new_data.get('opportunities', [])
54
  data_sources = new_data.get('data_sources', {})
55
 
56
- # Transform position distribution
57
  pos_dist = {
58
  'top_3': distribution.get('top3', 0),
59
  'top_10': distribution.get('top10', 0),
@@ -61,27 +60,27 @@ def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
61
  'beyond_50': totals.get('keywords', 0) - distribution.get('top50', 0)
62
  }
63
 
64
- # Transform best keywords to match expected format
65
  transformed_best_keywords = []
66
  for kw in best_keywords:
67
  transformed_best_keywords.append({
68
  'keyword': kw.get('keyword', ''),
69
  'position': kw.get('rank', 0),
70
- 'clicks': 0, # Not available in new API
71
  'impressions': kw.get('volume', 0),
72
  'url': kw.get('url', ''),
73
  'estimated_traffic': kw.get('estimated_traffic', 0),
74
  'trend': kw.get('trend', 'stable')
75
  })
76
 
77
- # Transform opportunities to match expected format
78
  transformed_opportunities = []
79
  for opp in opportunities:
80
  transformed_opportunities.append({
81
  'keyword': opp.get('keyword', ''),
82
- 'position': 0, # Not applicable for opportunities
83
  'impressions': opp.get('volume', 0),
84
- 'ctr': 0, # Not available
85
  'competitor_rank': opp.get('competitor_rank', 0),
86
  'priority_score': opp.get('priority_score', 0),
87
  'competitor_domain': opp.get('competitor_domain', '')
@@ -119,30 +118,30 @@ def generate_report():
119
  if not validators.url(url):
120
  return jsonify({'error': 'Please enter a valid URL'}), 400
121
 
122
- # Generate unique report ID
123
  report_id = str(uuid.uuid4())
124
 
125
- # Validate competitor URLs and extract domains
126
  competitor_domains = []
127
  competitor_list = []
128
  for comp in competitors:
129
  comp = comp.strip()
130
  if comp and validators.url(comp):
131
  competitor_list.append(comp)
132
- # Extract domain from competitor URL
133
  domain = urlparse(comp).netloc.replace('www.', '')
134
  competitor_domains.append(domain)
135
 
136
- # Technical SEO Analysis
137
  technical_data = technical_module.analyze(url)
138
 
139
- # Content Audit
140
  content_data = content_module.analyze(url)
141
 
142
- # Keywords Analysis - UPDATED: Pass competitor domains and handle errors
143
  keywords_result = keywords_module.analyze(url, competitor_domains=competitor_domains)
144
  if not keywords_result.success:
145
- # Fallback to placeholder data if keywords analysis fails
146
  keywords_data = {
147
  'placeholder': True,
148
  'message': f'Keywords analysis failed: {keywords_result.error}',
@@ -153,10 +152,10 @@ def generate_report():
153
  'data_source': 'Analysis failed'
154
  }
155
  else:
156
- # Transform new data structure to match report generator expectations
157
  keywords_data = _transform_keywords_data(keywords_result.data)
158
 
159
- # Backlinks Analysis - UNCOMMENTED: Enable backlinks analysis
160
  print(f"DEBUG: Starting backlinks analysis for {url}")
161
  backlinks_result = backlinks_module.analyze(url)
162
  backlinks_data = backlinks_result.data
@@ -167,18 +166,18 @@ def generate_report():
167
  if backlinks_data.get('placeholder'):
168
  print(f"DEBUG: Using placeholder data: {backlinks_data.get('message')}")
169
 
170
- # Generate LLM Recommendations
171
  llm_rec_data = llm_recommendations.generate_recommendations(
172
  url, technical_data, content_data, keywords_data, backlinks_data
173
  )
174
 
175
- # Competitor Analysis - UPDATED: Pass competitor domains
176
  competitor_data = []
177
  for comp_url in competitor_list:
178
  comp_technical = technical_module.analyze(comp_url)
179
  comp_content = content_module.analyze(comp_url, quick_scan=True)
180
 
181
- # Keywords analysis for competitor (no competitors for competitor analysis)
182
  comp_keywords_result = keywords_module.analyze(comp_url, competitor_domains=[], quick_scan=True)
183
  if comp_keywords_result.success:
184
  comp_keywords = _transform_keywords_data(comp_keywords_result.data)
@@ -193,7 +192,7 @@ def generate_report():
193
  'data_source': 'Analysis failed'
194
  }
195
 
196
- # Backlinks analysis for competitor - UNCOMMENTED
197
  comp_backlinks_result = backlinks_module.analyze(comp_url, quick_scan=True)
198
  comp_backlinks = comp_backlinks_result.data
199
 
@@ -205,7 +204,7 @@ def generate_report():
205
  'backlinks': comp_backlinks
206
  })
207
 
208
- # Generate HTML report
209
  report_html = report_gen.generate_html_report(
210
  url=url,
211
  technical_data=technical_data,
@@ -217,7 +216,7 @@ def generate_report():
217
  include_charts=True
218
  )
219
 
220
- # Store report
221
  reports_store[report_id] = {
222
  'url': url,
223
  'html': report_html,
@@ -256,7 +255,6 @@ def download_html(report_id):
256
 
257
  report_data = reports_store[report_id]
258
 
259
- # Create temporary file
260
  with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f:
261
  f.write(report_data['html'])
262
  temp_path = f.name
@@ -273,10 +271,10 @@ def download_pdf(report_id):
273
  try:
274
  report_data = reports_store[report_id]
275
 
276
- # Generate PDF
277
  pdf_data = pdf_gen.generate_pdf(report_data['html'])
278
 
279
- # Create temporary file
280
  with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:
281
  f.write(pdf_data)
282
  temp_path = f.name
 
1
+
2
  from flask import Flask, render_template, request, jsonify, send_file, redirect, url_for
3
  import validators
4
  import os
 
7
  from urllib.parse import urlparse
8
  from typing import Dict, Any
9
 
10
+
11
  from modules.technical_seo import TechnicalSEOModule
12
  from modules.content_audit import ContentAuditModule
13
  from modules.keywords import KeywordsModule
 
19
  app = Flask(__name__, static_folder='static')
20
  app.secret_key = 'seo_report_generator_2024'
21
 
22
+
23
  technical_module = TechnicalSEOModule()
24
  content_module = ContentAuditModule()
25
  keywords_module = KeywordsModule()
 
28
  pdf_gen = SimplePDFGenerator()
29
  llm_recommendations = LLMRecommendations()
30
 
31
+
32
  reports_store = {}
33
 
34
  def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]:
 
35
  if not new_data or new_data.get('placeholder'):
36
  return {
37
  'placeholder': True,
 
43
  'data_source': 'Analysis failed'
44
  }
45
 
46
+
47
  totals = new_data.get('totals', {})
48
  distribution = new_data.get('distribution', {})
49
  movement = new_data.get('movement', {})
 
52
  opportunities = new_data.get('opportunities', [])
53
  data_sources = new_data.get('data_sources', {})
54
 
55
+
56
  pos_dist = {
57
  'top_3': distribution.get('top3', 0),
58
  'top_10': distribution.get('top10', 0),
 
60
  'beyond_50': totals.get('keywords', 0) - distribution.get('top50', 0)
61
  }
62
 
63
+
64
  transformed_best_keywords = []
65
  for kw in best_keywords:
66
  transformed_best_keywords.append({
67
  'keyword': kw.get('keyword', ''),
68
  'position': kw.get('rank', 0),
69
+ 'clicks': 0,
70
  'impressions': kw.get('volume', 0),
71
  'url': kw.get('url', ''),
72
  'estimated_traffic': kw.get('estimated_traffic', 0),
73
  'trend': kw.get('trend', 'stable')
74
  })
75
 
76
+
77
  transformed_opportunities = []
78
  for opp in opportunities:
79
  transformed_opportunities.append({
80
  'keyword': opp.get('keyword', ''),
81
+ 'position': 0,
82
  'impressions': opp.get('volume', 0),
83
+ 'ctr': 0,
84
  'competitor_rank': opp.get('competitor_rank', 0),
85
  'priority_score': opp.get('priority_score', 0),
86
  'competitor_domain': opp.get('competitor_domain', '')
 
118
  if not validators.url(url):
119
  return jsonify({'error': 'Please enter a valid URL'}), 400
120
 
121
+
122
  report_id = str(uuid.uuid4())
123
 
124
+
125
  competitor_domains = []
126
  competitor_list = []
127
  for comp in competitors:
128
  comp = comp.strip()
129
  if comp and validators.url(comp):
130
  competitor_list.append(comp)
131
+
132
  domain = urlparse(comp).netloc.replace('www.', '')
133
  competitor_domains.append(domain)
134
 
135
+
136
  technical_data = technical_module.analyze(url)
137
 
138
+
139
  content_data = content_module.analyze(url)
140
 
141
+
142
  keywords_result = keywords_module.analyze(url, competitor_domains=competitor_domains)
143
  if not keywords_result.success:
144
+
145
  keywords_data = {
146
  'placeholder': True,
147
  'message': f'Keywords analysis failed: {keywords_result.error}',
 
152
  'data_source': 'Analysis failed'
153
  }
154
  else:
155
+
156
  keywords_data = _transform_keywords_data(keywords_result.data)
157
 
158
+
159
  print(f"DEBUG: Starting backlinks analysis for {url}")
160
  backlinks_result = backlinks_module.analyze(url)
161
  backlinks_data = backlinks_result.data
 
166
  if backlinks_data.get('placeholder'):
167
  print(f"DEBUG: Using placeholder data: {backlinks_data.get('message')}")
168
 
169
+
170
  llm_rec_data = llm_recommendations.generate_recommendations(
171
  url, technical_data, content_data, keywords_data, backlinks_data
172
  )
173
 
174
+
175
  competitor_data = []
176
  for comp_url in competitor_list:
177
  comp_technical = technical_module.analyze(comp_url)
178
  comp_content = content_module.analyze(comp_url, quick_scan=True)
179
 
180
+
181
  comp_keywords_result = keywords_module.analyze(comp_url, competitor_domains=[], quick_scan=True)
182
  if comp_keywords_result.success:
183
  comp_keywords = _transform_keywords_data(comp_keywords_result.data)
 
192
  'data_source': 'Analysis failed'
193
  }
194
 
195
+
196
  comp_backlinks_result = backlinks_module.analyze(comp_url, quick_scan=True)
197
  comp_backlinks = comp_backlinks_result.data
198
 
 
204
  'backlinks': comp_backlinks
205
  })
206
 
207
+
208
  report_html = report_gen.generate_html_report(
209
  url=url,
210
  technical_data=technical_data,
 
216
  include_charts=True
217
  )
218
 
219
+
220
  reports_store[report_id] = {
221
  'url': url,
222
  'html': report_html,
 
255
 
256
  report_data = reports_store[report_id]
257
 
 
258
  with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f:
259
  f.write(report_data['html'])
260
  temp_path = f.name
 
271
  try:
272
  report_data = reports_store[report_id]
273
 
274
+
275
  pdf_data = pdf_gen.generate_pdf(report_data['html'])
276
 
277
+
278
  with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:
279
  f.write(pdf_data)
280
  temp_path = f.name
llm_recommendations.py CHANGED
@@ -1,7 +1,4 @@
1
- """
2
- Groq LLM Integration for Smart SEO Recommendations
3
- Analyzes all 4 modules (Technical SEO, Content Audit, Keywords, Backlinks) to generate intelligent recommendations
4
- """
5
 
6
  import os
7
  import json
@@ -9,7 +6,7 @@ from typing import Dict, Any, List
9
  from groq import Groq
10
  from dotenv import load_dotenv
11
 
12
- # Load environment variables
13
  load_dotenv()
14
 
15
 
@@ -25,33 +22,20 @@ class LLMRecommendations:
25
  def generate_recommendations(self, url: str, technical_data: Dict[str, Any],
26
  content_data: Dict[str, Any], keywords_data: Dict[str, Any],
27
  backlinks_data: Dict[str, Any]) -> Dict[str, Any]:
28
- """
29
- Generate comprehensive SEO recommendations based on all module data
30
-
31
- Args:
32
- url: Target website URL
33
- technical_data: Technical SEO analysis results
34
- content_data: Content audit results
35
- keywords_data: Keywords analysis results
36
- backlinks_data: Backlinks analysis results
37
-
38
- Returns:
39
- Dictionary with recommendations and insights
40
- """
41
  if not self.available:
42
  return self._generate_fallback_recommendations(technical_data, content_data, keywords_data, backlinks_data)
43
 
44
  try:
45
- # Prepare context data for LLM
46
  context = self._prepare_context(url, technical_data, content_data, keywords_data, backlinks_data)
47
 
48
- # Generate recommendations using Groq
49
  recommendations = self._query_llm(context)
50
 
51
  return {
52
- 'recommendations': recommendations,
53
  'executive_insights': self._generate_executive_insights(context),
54
- 'priority_actions': self._extract_priority_actions(recommendations),
55
  'data_source': 'Groq LLM Analysis',
56
  'generated_at': context['analysis_date']
57
  }
@@ -61,9 +45,8 @@ class LLMRecommendations:
61
 
62
  def _prepare_context(self, url: str, technical_data: Dict, content_data: Dict,
63
  keywords_data: Dict, backlinks_data: Dict) -> Dict[str, Any]:
64
- """Prepare structured context for LLM analysis"""
65
 
66
- # Extract key metrics from each module
67
  context = {
68
  'website': url,
69
  'analysis_date': technical_data.get('last_updated', ''),
@@ -101,7 +84,6 @@ class LLMRecommendations:
101
  return context
102
 
103
  def _query_llm(self, context: Dict[str, Any]) -> List[str]:
104
- """Query Groq LLM for SEO recommendations"""
105
 
106
  prompt = f"""
107
  You are an expert SEO consultant analyzing a comprehensive SEO audit for {context['website']}. Based on the data below, provide specific, actionable SEO recommendations.
@@ -143,12 +125,18 @@ CRITICAL INSTRUCTIONS:
143
  5. Prioritize recommendations by potential impact and ease of implementation
144
  6. Include technical optimizations, content improvements, keyword opportunities, and link building strategies
145
  7. Provide estimated timelines and resources needed for each recommendation
 
 
146
 
147
- Generate exactly 8-12 specific recommendations in this format:
148
- - **[Priority Level]** [Specific Action]: [Detailed explanation with steps and expected impact]
 
 
 
149
 
150
  Priority Levels: HIGH, MEDIUM, LOW
151
  Focus on actionable items that can be implemented within 30-90 days.
 
152
 
153
  Response:
154
  """
@@ -158,35 +146,25 @@ Response:
158
  messages=[
159
  {'role': 'user', 'content': prompt}
160
  ],
161
- model="mixtral-8x7b-32768", # Using Mixtral for better reasoning
162
  stream=False,
163
- temperature=0.1, # Low temperature for consistent, focused recommendations
164
  max_tokens=1500
165
  )
166
 
167
  response = chat_completion.choices[0].message.content.strip()
168
 
169
- # Parse recommendations from response
170
- recommendations = []
171
- lines = response.split('\n')
172
- for line in lines:
173
- line = line.strip()
174
- if line.startswith('- **') or line.startswith('•'):
175
- # Clean up the recommendation
176
- recommendation = line.replace('- **', '').replace('• **', '').strip()
177
- if recommendation:
178
- recommendations.append(recommendation)
179
-
180
- return recommendations if recommendations else [response]
181
 
182
  except Exception as e:
183
  return [f"LLM Error: {str(e)}"]
184
 
185
  def _generate_executive_insights(self, context: Dict[str, Any]) -> List[str]:
186
- """Generate high-level executive insights"""
187
  insights = []
188
 
189
- # Technical Performance Insight
190
  mobile_score = context['technical_seo']['mobile_score']
191
  desktop_score = context['technical_seo']['desktop_score']
192
  avg_score = (mobile_score + desktop_score) / 2
@@ -198,7 +176,7 @@ Response:
198
  else:
199
  insights.append(f"🟢 Good: Website performance is solid (avg: {avg_score:.0f}/100)")
200
 
201
- # Content Insight
202
  pages = context['content_audit']['pages_analyzed']
203
  if pages > 0:
204
  metadata = context['content_audit']['metadata_completeness']
@@ -209,7 +187,7 @@ Response:
209
  else:
210
  insights.append(f"🟢 Content Quality: Metadata completeness is good ({title_pct:.0f}%)")
211
 
212
- # Keywords Insight
213
  if context['keywords']['data_available']:
214
  total_keywords = context['keywords']['total_keywords']
215
  pos_dist = context['keywords']['position_distribution']
@@ -224,7 +202,7 @@ Response:
224
  else:
225
  insights.append("📊 Connect keyword tracking tools for visibility insights")
226
 
227
- # Backlinks Insight
228
  if context['backlinks']['data_available']:
229
  ref_domains = context['backlinks']['total_ref_domains']
230
  domain_rating = context['backlinks']['domain_rating']
@@ -241,22 +219,65 @@ Response:
241
  return insights
242
 
243
  def _extract_priority_actions(self, recommendations: List[str]) -> List[Dict[str, str]]:
244
- """Extract priority actions from recommendations"""
245
  priority_actions = []
246
 
247
- for rec in recommendations:
248
- if '**HIGH**' in rec or '**CRITICAL**' in rec:
249
- # Extract action title and description
250
- parts = rec.replace('**HIGH**', '').replace('**CRITICAL**', '').strip()
251
- if ':' in parts:
252
- title, description = parts.split(':', 1)
253
- priority_actions.append({
254
- 'title': title.strip(),
255
- 'description': description.strip(),
256
- 'priority': 'HIGH'
257
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
- # If no high priority actions found, take first 3
260
  if not priority_actions and recommendations:
261
  for i, rec in enumerate(recommendations[:3]):
262
  if ':' in rec:
@@ -267,15 +288,14 @@ Response:
267
  'priority': 'HIGH'
268
  })
269
 
270
- return priority_actions[:5] # Top 5 priority actions
271
 
272
  def _generate_fallback_recommendations(self, technical_data: Dict, content_data: Dict,
273
  keywords_data: Dict, backlinks_data: Dict, error: str = None) -> Dict[str, Any]:
274
- """Generate basic recommendations when LLM is not available"""
275
 
276
  recommendations = []
277
 
278
- # Technical recommendations
279
  mobile_score = technical_data.get('mobile_score', 0)
280
  desktop_score = technical_data.get('desktop_score', 0)
281
 
@@ -284,7 +304,7 @@ Response:
284
  if desktop_score < 50:
285
  recommendations.append("**HIGH** Improve Desktop Performance: Optimize server response time, minimize CSS and JavaScript")
286
 
287
- # Content recommendations
288
  pages = content_data.get('pages_analyzed', 0)
289
  if pages > 0:
290
  metadata = content_data.get('metadata_completeness', {})
@@ -294,7 +314,7 @@ Response:
294
  if content_data.get('avg_word_count', 0) < 300:
295
  recommendations.append("**MEDIUM** Enhance Content: Increase average page content length")
296
 
297
- # Keywords recommendations
298
  if not keywords_data.get('placeholder', False):
299
  total_keywords = keywords_data.get('total_keywords', 0)
300
  pos_dist = keywords_data.get('position_distribution', {})
@@ -304,7 +324,7 @@ Response:
304
  else:
305
  recommendations.append("**MEDIUM** Set Up Keyword Tracking: Connect Google Search Console for keyword insights")
306
 
307
- # Backlinks recommendations
308
  if not backlinks_data.get('placeholder', False):
309
  ref_domains = backlinks_data.get('total_ref_domains', 0)
310
  if ref_domains < 50:
@@ -312,7 +332,7 @@ Response:
312
  else:
313
  recommendations.append("**MEDIUM** Set Up Backlink Monitoring: Add RapidAPI key for comprehensive link analysis")
314
 
315
- # Default recommendations if none generated
316
  if not recommendations:
317
  recommendations = [
318
  "**HIGH** Audit Technical Issues: Review site speed and mobile performance",
@@ -329,8 +349,11 @@ Response:
329
  if error:
330
  insights.append(f"❌ LLM Error: {error}")
331
 
 
 
 
332
  return {
333
- 'recommendations': recommendations,
334
  'executive_insights': insights,
335
  'priority_actions': [
336
  {
 
1
+
 
 
 
2
 
3
  import os
4
  import json
 
6
  from groq import Groq
7
  from dotenv import load_dotenv
8
 
9
+
10
  load_dotenv()
11
 
12
 
 
22
  def generate_recommendations(self, url: str, technical_data: Dict[str, Any],
23
  content_data: Dict[str, Any], keywords_data: Dict[str, Any],
24
  backlinks_data: Dict[str, Any]) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  if not self.available:
26
  return self._generate_fallback_recommendations(technical_data, content_data, keywords_data, backlinks_data)
27
 
28
  try:
29
+
30
  context = self._prepare_context(url, technical_data, content_data, keywords_data, backlinks_data)
31
 
32
+
33
  recommendations = self._query_llm(context)
34
 
35
  return {
36
+ 'recommendations_markdown': recommendations,
37
  'executive_insights': self._generate_executive_insights(context),
38
+ 'priority_actions': self._extract_priority_actions([recommendations]),
39
  'data_source': 'Groq LLM Analysis',
40
  'generated_at': context['analysis_date']
41
  }
 
45
 
46
  def _prepare_context(self, url: str, technical_data: Dict, content_data: Dict,
47
  keywords_data: Dict, backlinks_data: Dict) -> Dict[str, Any]:
 
48
 
49
+
50
  context = {
51
  'website': url,
52
  'analysis_date': technical_data.get('last_updated', ''),
 
84
  return context
85
 
86
  def _query_llm(self, context: Dict[str, Any]) -> List[str]:
 
87
 
88
  prompt = f"""
89
  You are an expert SEO consultant analyzing a comprehensive SEO audit for {context['website']}. Based on the data below, provide specific, actionable SEO recommendations.
 
125
  5. Prioritize recommendations by potential impact and ease of implementation
126
  6. Include technical optimizations, content improvements, keyword opportunities, and link building strategies
127
  7. Provide estimated timelines and resources needed for each recommendation
128
+ 8. IMPORTANT: Use ONLY plain text format with markdown syntax - NO tables, NO complex formatting, NO HTML
129
+ 9. Format your response as clean markdown that can be rendered properly
130
 
131
+ Generate exactly 8-12 specific recommendations using simple markdown format:
132
+ ## Priority: HIGH/MEDIUM/LOW
133
+ **Action Title**
134
+ Description with clear steps and expected impact.
135
+ Timeline: X weeks
136
 
137
  Priority Levels: HIGH, MEDIUM, LOW
138
  Focus on actionable items that can be implemented within 30-90 days.
139
+ Use simple markdown formatting only - headers, bold text, and bullet points.
140
 
141
  Response:
142
  """
 
146
  messages=[
147
  {'role': 'user', 'content': prompt}
148
  ],
149
+ model="openai/gpt-oss-120b",
150
  stream=False,
151
+ temperature=0.1,
152
  max_tokens=1500
153
  )
154
 
155
  response = chat_completion.choices[0].message.content.strip()
156
 
157
+
158
+ # Return the full markdown response instead of parsing individual recommendations
159
+ return response
 
 
 
 
 
 
 
 
 
160
 
161
  except Exception as e:
162
  return [f"LLM Error: {str(e)}"]
163
 
164
  def _generate_executive_insights(self, context: Dict[str, Any]) -> List[str]:
 
165
  insights = []
166
 
167
+
168
  mobile_score = context['technical_seo']['mobile_score']
169
  desktop_score = context['technical_seo']['desktop_score']
170
  avg_score = (mobile_score + desktop_score) / 2
 
176
  else:
177
  insights.append(f"🟢 Good: Website performance is solid (avg: {avg_score:.0f}/100)")
178
 
179
+
180
  pages = context['content_audit']['pages_analyzed']
181
  if pages > 0:
182
  metadata = context['content_audit']['metadata_completeness']
 
187
  else:
188
  insights.append(f"🟢 Content Quality: Metadata completeness is good ({title_pct:.0f}%)")
189
 
190
+
191
  if context['keywords']['data_available']:
192
  total_keywords = context['keywords']['total_keywords']
193
  pos_dist = context['keywords']['position_distribution']
 
202
  else:
203
  insights.append("📊 Connect keyword tracking tools for visibility insights")
204
 
205
+
206
  if context['backlinks']['data_available']:
207
  ref_domains = context['backlinks']['total_ref_domains']
208
  domain_rating = context['backlinks']['domain_rating']
 
219
  return insights
220
 
221
  def _extract_priority_actions(self, recommendations: List[str]) -> List[Dict[str, str]]:
 
222
  priority_actions = []
223
 
224
+ # Handle the case where recommendations is a single string (markdown)
225
+ if isinstance(recommendations, list) and len(recommendations) == 1:
226
+ markdown_text = recommendations[0]
227
+ elif isinstance(recommendations, str):
228
+ markdown_text = recommendations
229
+ else:
230
+ markdown_text = ""
231
+
232
+ # Extract high priority actions from markdown
233
+ if markdown_text:
234
+ lines = markdown_text.split('\n')
235
+ current_priority = None
236
+ current_title = None
237
+ current_description = []
238
+
239
+ for line in lines:
240
+ line = line.strip()
241
+ if line.startswith('## Priority:'):
242
+ # Save previous action if exists
243
+ if current_title and current_priority == 'HIGH':
244
+ priority_actions.append({
245
+ 'title': current_title,
246
+ 'description': ' '.join(current_description).strip(),
247
+ 'priority': 'HIGH'
248
+ })
249
+
250
+ # Start new action
251
+ current_priority = line.replace('## Priority:', '').strip()
252
+ current_title = None
253
+ current_description = []
254
+ elif line.startswith('**') and line.endswith('**'):
255
+ current_title = line.replace('**', '').strip()
256
+ elif line and not line.startswith('#'):
257
+ current_description.append(line)
258
+
259
+ # Save last action if exists
260
+ if current_title and current_priority == 'HIGH':
261
+ priority_actions.append({
262
+ 'title': current_title,
263
+ 'description': ' '.join(current_description).strip(),
264
+ 'priority': 'HIGH'
265
+ })
266
+
267
+ # Fallback for old format
268
+ if not priority_actions and isinstance(recommendations, list):
269
+ for rec in recommendations:
270
+ if '**HIGH**' in rec or '**CRITICAL**' in rec:
271
+ parts = rec.replace('**HIGH**', '').replace('**CRITICAL**', '').strip()
272
+ if ':' in parts:
273
+ title, description = parts.split(':', 1)
274
+ priority_actions.append({
275
+ 'title': title.strip(),
276
+ 'description': description.strip(),
277
+ 'priority': 'HIGH'
278
+ })
279
 
280
+
281
  if not priority_actions and recommendations:
282
  for i, rec in enumerate(recommendations[:3]):
283
  if ':' in rec:
 
288
  'priority': 'HIGH'
289
  })
290
 
291
+ return priority_actions[:5]
292
 
293
  def _generate_fallback_recommendations(self, technical_data: Dict, content_data: Dict,
294
  keywords_data: Dict, backlinks_data: Dict, error: str = None) -> Dict[str, Any]:
 
295
 
296
  recommendations = []
297
 
298
+
299
  mobile_score = technical_data.get('mobile_score', 0)
300
  desktop_score = technical_data.get('desktop_score', 0)
301
 
 
304
  if desktop_score < 50:
305
  recommendations.append("**HIGH** Improve Desktop Performance: Optimize server response time, minimize CSS and JavaScript")
306
 
307
+
308
  pages = content_data.get('pages_analyzed', 0)
309
  if pages > 0:
310
  metadata = content_data.get('metadata_completeness', {})
 
314
  if content_data.get('avg_word_count', 0) < 300:
315
  recommendations.append("**MEDIUM** Enhance Content: Increase average page content length")
316
 
317
+
318
  if not keywords_data.get('placeholder', False):
319
  total_keywords = keywords_data.get('total_keywords', 0)
320
  pos_dist = keywords_data.get('position_distribution', {})
 
324
  else:
325
  recommendations.append("**MEDIUM** Set Up Keyword Tracking: Connect Google Search Console for keyword insights")
326
 
327
+
328
  if not backlinks_data.get('placeholder', False):
329
  ref_domains = backlinks_data.get('total_ref_domains', 0)
330
  if ref_domains < 50:
 
332
  else:
333
  recommendations.append("**MEDIUM** Set Up Backlink Monitoring: Add RapidAPI key for comprehensive link analysis")
334
 
335
+
336
  if not recommendations:
337
  recommendations = [
338
  "**HIGH** Audit Technical Issues: Review site speed and mobile performance",
 
349
  if error:
350
  insights.append(f"❌ LLM Error: {error}")
351
 
352
+ # Convert recommendations list to markdown format
353
+ markdown_recommendations = "\n".join([f"## Priority: HIGH\n**{rec.replace('**HIGH**', '').replace('**MEDIUM**', '').replace('**LOW**', '').strip()}**\n" for rec in recommendations])
354
+
355
  return {
356
+ 'recommendations_markdown': markdown_recommendations,
357
  'executive_insights': insights,
358
  'priority_actions': [
359
  {
modules/backlinks.py CHANGED
@@ -73,14 +73,12 @@ class BacklinksModule:
73
  )
74
 
75
  def _extract_domain(self, url: str) -> str:
76
- """Extract clean domain from URL"""
77
  if not url.startswith(('http://', 'https://')):
78
  url = 'https://' + url
79
  domain = urlparse(url).netloc.replace('www.', '')
80
  return domain
81
 
82
  def _api_request_with_retry(self, url: str, params: Dict = None, headers: Dict = None) -> Optional[Dict]:
83
- """Make API request with retry logic"""
84
  if headers is None:
85
  headers = self.headers.copy()
86
 
@@ -90,8 +88,8 @@ class BacklinksModule:
90
 
91
  if response.status_code == 200:
92
  return response.json()
93
- elif response.status_code == 429: # Rate limit
94
- wait_time = (attempt + 1) * 2 # Exponential backoff
95
  print(f"Rate limited, waiting {wait_time}s...")
96
  time.sleep(wait_time)
97
  continue
@@ -124,7 +122,7 @@ class BacklinksModule:
124
  # Limit results for quick scan
125
  if quick_scan:
126
  return data[:50]
127
- return data[:500] # Reasonable limit to avoid memory issues
128
 
129
  except Exception as e:
130
  print(f"Individual backlinks API error: {str(e)}")
@@ -132,7 +130,6 @@ class BacklinksModule:
132
  return []
133
 
134
  def _get_majestic_metrics(self, domain: str) -> Dict[str, Any]:
135
- """Get Majestic domain metrics via RapidAPI"""
136
  try:
137
  headers = self.headers.copy()
138
  headers['x-rapidapi-host'] = 'majestic1.p.rapidapi.com'
@@ -274,7 +271,7 @@ class BacklinksModule:
274
 
275
  # Sort by backlinks count and return top domains
276
  top_domains = sorted(domain_stats.values(), key=lambda x: x['backlinks'], reverse=True)
277
- return top_domains[:20] # Top 20 referring domains
278
 
279
  def _extract_anchor_distribution(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
280
  """Analyze anchor text distribution"""
@@ -282,7 +279,7 @@ class BacklinksModule:
282
 
283
  for link in backlinks:
284
  anchor = link.get('anchor', '').strip()
285
- if not anchor or len(anchor) > 100: # Skip very long anchors
286
  continue
287
 
288
  if anchor not in anchor_stats:
@@ -316,7 +313,7 @@ class BacklinksModule:
316
 
317
  # Sort by backlinks count
318
  anchor_distribution.sort(key=lambda x: x['backlinks'], reverse=True)
319
- return anchor_distribution[:15] # Top 15 anchor texts
320
 
321
  def _calculate_monthly_changes(self, backlinks: List[Dict]) -> Dict[str, int]:
322
  """Calculate monthly backlinks changes"""
@@ -335,14 +332,14 @@ class BacklinksModule:
335
  link_date = datetime.strptime(first_seen, '%Y-%m-%d')
336
  if link_date >= last_month:
337
  new_links += 1
338
- if link_date >= now - timedelta(days=90): # 3 months
339
  recent_links += 1
340
  except Exception:
341
  continue
342
 
343
  return {
344
  'new_backlinks': new_links,
345
- 'lost_backlinks': 0, # Can't calculate without historical data
346
  'net_change': new_links,
347
  'recent_backlinks_3m': recent_links
348
  }
@@ -384,9 +381,9 @@ class BacklinksModule:
384
 
385
  # Quality score (0-100)
386
  quality_score = min(100, (
387
- (follow_ratio * 0.4) + # 40% weight on follow ratio
388
- (avg_authority * 2) + # 40% weight on authority (scaled)
389
- (min(20, len(set(link.get('url_from', '').split('/')[2] for link in backlinks))) * 1) # 20% on domain diversity
390
  ))
391
 
392
  return {
@@ -398,7 +395,6 @@ class BacklinksModule:
398
  }
399
 
400
  def _get_data_sources(self, individual_backlinks: List, majestic_metrics: Dict, domain_metrics: Dict) -> List[str]:
401
- """Track which data sources provided information"""
402
  sources = []
403
 
404
  if individual_backlinks:
@@ -411,7 +407,6 @@ class BacklinksModule:
411
  return sources or ['No data sources available']
412
 
413
  def _generate_no_api_data(self, url: str) -> ModuleResult:
414
- """Generate response when no API key is available"""
415
  domain = self._extract_domain(url)
416
 
417
  no_api_data = {
 
73
  )
74
 
75
  def _extract_domain(self, url: str) -> str:
 
76
  if not url.startswith(('http://', 'https://')):
77
  url = 'https://' + url
78
  domain = urlparse(url).netloc.replace('www.', '')
79
  return domain
80
 
81
  def _api_request_with_retry(self, url: str, params: Dict = None, headers: Dict = None) -> Optional[Dict]:
 
82
  if headers is None:
83
  headers = self.headers.copy()
84
 
 
88
 
89
  if response.status_code == 200:
90
  return response.json()
91
+ elif response.status_code == 429:
92
+ wait_time = (attempt + 1) * 2
93
  print(f"Rate limited, waiting {wait_time}s...")
94
  time.sleep(wait_time)
95
  continue
 
122
  # Limit results for quick scan
123
  if quick_scan:
124
  return data[:50]
125
+ return data[:500]
126
 
127
  except Exception as e:
128
  print(f"Individual backlinks API error: {str(e)}")
 
130
  return []
131
 
132
  def _get_majestic_metrics(self, domain: str) -> Dict[str, Any]:
 
133
  try:
134
  headers = self.headers.copy()
135
  headers['x-rapidapi-host'] = 'majestic1.p.rapidapi.com'
 
271
 
272
  # Sort by backlinks count and return top domains
273
  top_domains = sorted(domain_stats.values(), key=lambda x: x['backlinks'], reverse=True)
274
+ return top_domains[:20]
275
 
276
  def _extract_anchor_distribution(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
277
  """Analyze anchor text distribution"""
 
279
 
280
  for link in backlinks:
281
  anchor = link.get('anchor', '').strip()
282
+ if not anchor or len(anchor) > 100:
283
  continue
284
 
285
  if anchor not in anchor_stats:
 
313
 
314
  # Sort by backlinks count
315
  anchor_distribution.sort(key=lambda x: x['backlinks'], reverse=True)
316
+ return anchor_distribution[:15]
317
 
318
  def _calculate_monthly_changes(self, backlinks: List[Dict]) -> Dict[str, int]:
319
  """Calculate monthly backlinks changes"""
 
332
  link_date = datetime.strptime(first_seen, '%Y-%m-%d')
333
  if link_date >= last_month:
334
  new_links += 1
335
+ if link_date >= now - timedelta(days=90):
336
  recent_links += 1
337
  except Exception:
338
  continue
339
 
340
  return {
341
  'new_backlinks': new_links,
342
+ 'lost_backlinks': 0,
343
  'net_change': new_links,
344
  'recent_backlinks_3m': recent_links
345
  }
 
381
 
382
  # Quality score (0-100)
383
  quality_score = min(100, (
384
+ (follow_ratio * 0.4) +
385
+ (avg_authority * 2) +
386
+ (min(20, len(set(link.get('url_from', '').split('/')[2] for link in backlinks))) * 1)
387
  ))
388
 
389
  return {
 
395
  }
396
 
397
  def _get_data_sources(self, individual_backlinks: List, majestic_metrics: Dict, domain_metrics: Dict) -> List[str]:
 
398
  sources = []
399
 
400
  if individual_backlinks:
 
407
  return sources or ['No data sources available']
408
 
409
  def _generate_no_api_data(self, url: str) -> ModuleResult:
 
410
  domain = self._extract_domain(url)
411
 
412
  no_api_data = {
modules/content_audit.py CHANGED
@@ -59,7 +59,6 @@ class ContentAuditModule:
59
  return self._get_fallback_data(url, str(e))
60
 
61
  def _get_sitemap_urls(self, base_url: str, limit: int = 200) -> List[str]:
62
- """Extract URLs from sitemap.xml"""
63
  urls = []
64
 
65
  # Common sitemap locations
@@ -81,7 +80,6 @@ class ContentAuditModule:
81
  return urls[:limit]
82
 
83
  def _parse_sitemap(self, sitemap_content: bytes, base_url: str, limit: int) -> List[str]:
84
- """Parse sitemap XML content"""
85
  urls = []
86
 
87
  try:
@@ -117,7 +115,6 @@ class ContentAuditModule:
117
  return urls[:limit]
118
 
119
  def _crawl_from_homepage(self, base_url: str, limit: int = 50) -> List[str]:
120
- """Crawl URLs starting from homepage"""
121
  urls = set([base_url])
122
  processed = set()
123
 
@@ -143,7 +140,6 @@ class ContentAuditModule:
143
  return list(urls)[:limit]
144
 
145
  def _analyze_page(self, url: str) -> Dict[str, Any]:
146
- """Analyze a single page"""
147
  try:
148
  response = self.session.get(url, timeout=15)
149
  if response.status_code != 200:
@@ -208,7 +204,6 @@ class ContentAuditModule:
208
  return soup.get_text()
209
 
210
  def _detect_cta(self, soup: BeautifulSoup) -> bool:
211
- """Detect presence of call-to-action elements"""
212
  text_content = soup.get_text().lower()
213
 
214
  for keyword in self.cta_keywords:
@@ -225,7 +220,6 @@ class ContentAuditModule:
225
  return False
226
 
227
  def _get_last_modified(self, headers: Dict, soup: BeautifulSoup) -> str:
228
- """Get last modified date from headers or meta tags"""
229
  # Check headers first
230
  if 'last-modified' in headers:
231
  return headers['last-modified']
@@ -240,7 +234,6 @@ class ContentAuditModule:
240
  return ""
241
 
242
  def _is_valid_content_url(self, url: str) -> bool:
243
- """Check if URL is valid for content analysis"""
244
  if not url:
245
  return False
246
 
@@ -261,7 +254,6 @@ class ContentAuditModule:
261
  return True
262
 
263
  def _is_same_domain(self, url1: str, url2: str) -> bool:
264
- """Check if two URLs are from the same domain"""
265
  try:
266
  domain1 = urlparse(url1).netloc
267
  domain2 = urlparse(url2).netloc
@@ -270,7 +262,6 @@ class ContentAuditModule:
270
  return False
271
 
272
  def _calculate_metrics(self, base_url: str, pages_data: List[Dict], quick_scan: bool) -> Dict[str, Any]:
273
- """Calculate aggregate metrics from page data"""
274
  total_pages = len(pages_data)
275
  valid_pages = [p for p in pages_data if 'error' not in p]
276
 
@@ -318,7 +309,6 @@ class ContentAuditModule:
318
  }
319
 
320
  def _analyze_content_freshness(self, pages_data: List[Dict]) -> Dict[str, Any]:
321
- """Analyze content freshness based on last modified dates"""
322
  now = datetime.now()
323
  six_months_ago = now - timedelta(days=180)
324
  eighteen_months_ago = now - timedelta(days=540)
@@ -361,7 +351,6 @@ class ContentAuditModule:
361
  }
362
 
363
  def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
364
- """Return fallback data when analysis fails"""
365
  return {
366
  'url': url,
367
  'error': f"Content audit failed: {error}",
 
59
  return self._get_fallback_data(url, str(e))
60
 
61
  def _get_sitemap_urls(self, base_url: str, limit: int = 200) -> List[str]:
 
62
  urls = []
63
 
64
  # Common sitemap locations
 
80
  return urls[:limit]
81
 
82
  def _parse_sitemap(self, sitemap_content: bytes, base_url: str, limit: int) -> List[str]:
 
83
  urls = []
84
 
85
  try:
 
115
  return urls[:limit]
116
 
117
  def _crawl_from_homepage(self, base_url: str, limit: int = 50) -> List[str]:
 
118
  urls = set([base_url])
119
  processed = set()
120
 
 
140
  return list(urls)[:limit]
141
 
142
  def _analyze_page(self, url: str) -> Dict[str, Any]:
 
143
  try:
144
  response = self.session.get(url, timeout=15)
145
  if response.status_code != 200:
 
204
  return soup.get_text()
205
 
206
  def _detect_cta(self, soup: BeautifulSoup) -> bool:
 
207
  text_content = soup.get_text().lower()
208
 
209
  for keyword in self.cta_keywords:
 
220
  return False
221
 
222
  def _get_last_modified(self, headers: Dict, soup: BeautifulSoup) -> str:
 
223
  # Check headers first
224
  if 'last-modified' in headers:
225
  return headers['last-modified']
 
234
  return ""
235
 
236
  def _is_valid_content_url(self, url: str) -> bool:
 
237
  if not url:
238
  return False
239
 
 
254
  return True
255
 
256
  def _is_same_domain(self, url1: str, url2: str) -> bool:
 
257
  try:
258
  domain1 = urlparse(url1).netloc
259
  domain2 = urlparse(url2).netloc
 
262
  return False
263
 
264
  def _calculate_metrics(self, base_url: str, pages_data: List[Dict], quick_scan: bool) -> Dict[str, Any]:
 
265
  total_pages = len(pages_data)
266
  valid_pages = [p for p in pages_data if 'error' not in p]
267
 
 
309
  }
310
 
311
  def _analyze_content_freshness(self, pages_data: List[Dict]) -> Dict[str, Any]:
 
312
  now = datetime.now()
313
  six_months_ago = now - timedelta(days=180)
314
  eighteen_months_ago = now - timedelta(days=540)
 
351
  }
352
 
353
  def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
 
354
  return {
355
  'url': url,
356
  'error': f"Content audit failed: {error}",
modules/keywords.py CHANGED
@@ -118,13 +118,11 @@ class KeywordsModule:
118
  )
119
 
120
  def _extract_domain(self, url: str) -> str:
121
- """Extract domain from URL"""
122
  if not url.startswith(('http://', 'https://')):
123
  url = 'https://' + url
124
  return urlparse(url).netloc.replace('www.', '')
125
 
126
  def _fetch_domain_keywords(self, domain: str, quick_scan: bool) -> Dict[str, Any]:
127
- """Fetch keywords data for a domain using Competitors Ranking Keywords API"""
128
  try:
129
  all_keywords = []
130
  offset = 0
@@ -187,7 +185,6 @@ class KeywordsModule:
187
  return {'success': False, 'error': str(e)}
188
 
189
  def _calculate_domain_statistics(self, keywords: List[Dict]) -> Dict[str, Any]:
190
- """Calculate domain statistics from keywords data"""
191
  total_keywords = len(keywords)
192
 
193
  # Position distribution
@@ -221,7 +218,6 @@ class KeywordsModule:
221
 
222
  def _process_keywords_data(self, main_data: Dict, competitor_data: Dict,
223
  domain: str, competitor_domains: List[str]) -> Dict[str, Any]:
224
- """Process and structure the keywords data"""
225
  stats = main_data['statistics']['organic']
226
  keywords = main_data['keywords']
227
 
@@ -288,7 +284,6 @@ class KeywordsModule:
288
  }
289
 
290
  def _identify_best_keywords(self, keywords: List[Dict]) -> List[Dict]:
291
- """Identify best performing keywords"""
292
  best_candidates = [
293
  k for k in keywords
294
  if k.get('rank', 100) <= 3 and k.get('estimated_traffic_volume', 0) > 10
@@ -310,7 +305,6 @@ class KeywordsModule:
310
  ]
311
 
312
  def _identify_declining_keywords(self, keywords: List[Dict]) -> List[Dict]:
313
- """Identify keywords with declining performance"""
314
  declining_candidates = []
315
 
316
  for k in keywords:
@@ -333,7 +327,6 @@ class KeywordsModule:
333
 
334
  def _analyze_competitor_gaps(self, main_keywords: List[Dict], competitor_data: Dict,
335
  domain: str, competitor_domains: List[str]) -> Tuple[List[Dict], List[Dict]]:
336
- """Analyze competitor gaps and opportunities"""
337
  opportunities = []
338
  competitor_summary = []
339
 
@@ -385,10 +378,9 @@ class KeywordsModule:
385
  # Sort all opportunities by priority score
386
  opportunities.sort(key=lambda x: x['priority_score'], reverse=True)
387
 
388
- return opportunities[:50], competitor_summary # Top 50 overall
389
 
390
  def _calculate_opportunity_score(self, competitor_rank: int, search_volume: int, difficulty: int) -> float:
391
- """Calculate opportunity score using the PRD algorithm"""
392
  position_ctr = {1: 28, 2: 15, 3: 11, 4: 8, 5: 7, 10: 2, 20: 1}
393
 
394
  # Find closest CTR value
@@ -406,7 +398,6 @@ class KeywordsModule:
406
  return min(round(score, 1), 100)
407
 
408
  def _estimate_difficulty(self, rank: int, volume: int) -> int:
409
- """Estimate keyword difficulty based on rank and volume"""
410
  # Simple heuristic - in practice, this would come from a keyword difficulty API
411
  if rank <= 3:
412
  return 20 + (volume // 1000) * 5
@@ -416,7 +407,6 @@ class KeywordsModule:
416
  return 50 + (volume // 1000) * 2
417
 
418
  def _enrich_keywords_data(self, keywords: List[Dict]) -> List[Dict]:
419
- """Enrich keywords with volume and CPC data"""
420
  # Identify keywords needing enrichment
421
  keywords_to_enrich = [
422
  k for k in keywords
@@ -445,7 +435,6 @@ class KeywordsModule:
445
  return enriched_keywords
446
 
447
  def _batch_enrich_keywords(self, keywords: List[str]) -> Dict[str, Dict]:
448
- """Batch enrich keywords using Google Keyword Insight API"""
449
  enriched_data = {}
450
 
451
  # Process in batches
@@ -518,17 +507,14 @@ class KeywordsModule:
518
  return enriched_data
519
 
520
  def _get_cache_key(self, keyword: str) -> str:
521
- """Generate cache key for keyword"""
522
  return hashlib.md5(keyword.lower().encode()).hexdigest()
523
 
524
  def _calculate_enrichment_rate(self, keywords: List[Dict]) -> float:
525
- """Calculate the percentage of keywords with volume data"""
526
  enriched = sum(1 for k in keywords if k.get('avg_search_volume', 0) > 0)
527
  total = len(keywords)
528
  return round(enriched / total * 100, 1) if total > 0 else 0
529
 
530
  def _determine_trend(self, keyword_data: Dict) -> str:
531
- """Determine keyword trend based on rank changes"""
532
  current_rank = keyword_data.get('rank', 100)
533
  previous_rank = keyword_data.get('previous_rank', 100)
534
 
@@ -542,13 +528,11 @@ class KeywordsModule:
542
  return 'stable'
543
 
544
  def _rate_limit_primary_api(self):
545
- """Rate limiting for primary API (60 requests/minute)"""
546
  current_time = time.time()
547
- if current_time - self.last_primary_call < 1: # 1 second between calls
548
  time.sleep(1)
549
 
550
  def _rate_limit_enrichment_api(self):
551
- """Rate limiting for enrichment API (100 requests/minute)"""
552
  current_time = time.time()
553
- if current_time - self.last_enrichment_call < 0.6: # 0.6 seconds between calls
554
  time.sleep(0.6)
 
118
  )
119
 
120
  def _extract_domain(self, url: str) -> str:
 
121
  if not url.startswith(('http://', 'https://')):
122
  url = 'https://' + url
123
  return urlparse(url).netloc.replace('www.', '')
124
 
125
  def _fetch_domain_keywords(self, domain: str, quick_scan: bool) -> Dict[str, Any]:
 
126
  try:
127
  all_keywords = []
128
  offset = 0
 
185
  return {'success': False, 'error': str(e)}
186
 
187
  def _calculate_domain_statistics(self, keywords: List[Dict]) -> Dict[str, Any]:
 
188
  total_keywords = len(keywords)
189
 
190
  # Position distribution
 
218
 
219
  def _process_keywords_data(self, main_data: Dict, competitor_data: Dict,
220
  domain: str, competitor_domains: List[str]) -> Dict[str, Any]:
 
221
  stats = main_data['statistics']['organic']
222
  keywords = main_data['keywords']
223
 
 
284
  }
285
 
286
  def _identify_best_keywords(self, keywords: List[Dict]) -> List[Dict]:
 
287
  best_candidates = [
288
  k for k in keywords
289
  if k.get('rank', 100) <= 3 and k.get('estimated_traffic_volume', 0) > 10
 
305
  ]
306
 
307
  def _identify_declining_keywords(self, keywords: List[Dict]) -> List[Dict]:
 
308
  declining_candidates = []
309
 
310
  for k in keywords:
 
327
 
328
  def _analyze_competitor_gaps(self, main_keywords: List[Dict], competitor_data: Dict,
329
  domain: str, competitor_domains: List[str]) -> Tuple[List[Dict], List[Dict]]:
 
330
  opportunities = []
331
  competitor_summary = []
332
 
 
378
  # Sort all opportunities by priority score
379
  opportunities.sort(key=lambda x: x['priority_score'], reverse=True)
380
 
381
+ return opportunities[:50], competitor_summary
382
 
383
  def _calculate_opportunity_score(self, competitor_rank: int, search_volume: int, difficulty: int) -> float:
 
384
  position_ctr = {1: 28, 2: 15, 3: 11, 4: 8, 5: 7, 10: 2, 20: 1}
385
 
386
  # Find closest CTR value
 
398
  return min(round(score, 1), 100)
399
 
400
  def _estimate_difficulty(self, rank: int, volume: int) -> int:
 
401
  # Simple heuristic - in practice, this would come from a keyword difficulty API
402
  if rank <= 3:
403
  return 20 + (volume // 1000) * 5
 
407
  return 50 + (volume // 1000) * 2
408
 
409
  def _enrich_keywords_data(self, keywords: List[Dict]) -> List[Dict]:
 
410
  # Identify keywords needing enrichment
411
  keywords_to_enrich = [
412
  k for k in keywords
 
435
  return enriched_keywords
436
 
437
  def _batch_enrich_keywords(self, keywords: List[str]) -> Dict[str, Dict]:
 
438
  enriched_data = {}
439
 
440
  # Process in batches
 
507
  return enriched_data
508
 
509
  def _get_cache_key(self, keyword: str) -> str:
 
510
  return hashlib.md5(keyword.lower().encode()).hexdigest()
511
 
512
  def _calculate_enrichment_rate(self, keywords: List[Dict]) -> float:
 
513
  enriched = sum(1 for k in keywords if k.get('avg_search_volume', 0) > 0)
514
  total = len(keywords)
515
  return round(enriched / total * 100, 1) if total > 0 else 0
516
 
517
  def _determine_trend(self, keyword_data: Dict) -> str:
 
518
  current_rank = keyword_data.get('rank', 100)
519
  previous_rank = keyword_data.get('previous_rank', 100)
520
 
 
528
  return 'stable'
529
 
530
  def _rate_limit_primary_api(self):
 
531
  current_time = time.time()
532
+ if current_time - self.last_primary_call < 1:
533
  time.sleep(1)
534
 
535
  def _rate_limit_enrichment_api(self):
 
536
  current_time = time.time()
537
+ if current_time - self.last_enrichment_call < 0.6:
538
  time.sleep(0.6)
modules/technical_seo.py CHANGED
@@ -4,12 +4,6 @@ from typing import Dict, Any, Optional
4
 
5
  class TechnicalSEOModule:
6
  def __init__(self, api_key: Optional[str] = None):
7
- """
8
- Initialize Technical SEO module
9
-
10
- Args:
11
- api_key: Google PageSpeed Insights API key (optional for basic usage)
12
- """
13
  self.api_key = api_key
14
  self.base_url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
15
 
@@ -45,7 +39,6 @@ class TechnicalSEOModule:
45
  return self._get_fallback_data(url, str(e))
46
 
47
  def _get_pagespeed_data(self, url: str, strategy: str) -> Dict[str, Any]:
48
- """Get PageSpeed Insights data for URL and strategy"""
49
  params = {
50
  'url': url,
51
  'strategy': strategy,
@@ -64,7 +57,6 @@ class TechnicalSEOModule:
64
  raise
65
 
66
  def _extract_metrics(self, data: Dict[str, Any], strategy: str) -> Dict[str, Any]:
67
- """Extract key performance metrics from PageSpeed data"""
68
  lighthouse_result = data.get('lighthouseResult', {})
69
  categories = lighthouse_result.get('categories', {})
70
  audits = lighthouse_result.get('audits', {})
@@ -91,7 +83,6 @@ class TechnicalSEOModule:
91
  }
92
 
93
  def _extract_core_web_vitals(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
94
- """Extract Core Web Vitals metrics"""
95
  def get_metric_value(data, metric_key):
96
  audits = data.get('lighthouseResult', {}).get('audits', {})
97
  metric = audits.get(metric_key, {})
@@ -116,7 +107,6 @@ class TechnicalSEOModule:
116
  }
117
 
118
  def _extract_opportunities(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
119
- """Extract optimization opportunities"""
120
  mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
121
 
122
  opportunities = []
@@ -128,7 +118,7 @@ class TechnicalSEOModule:
128
 
129
  for key in opportunity_keys:
130
  audit = mobile_audits.get(key, {})
131
- if audit.get('score', 1) < 0.9: # Only include if score is low
132
  opportunities.append({
133
  'id': key,
134
  'title': audit.get('title', key.replace('-', ' ').title()),
@@ -137,10 +127,9 @@ class TechnicalSEOModule:
137
  'potential_savings': audit.get('details', {}).get('overallSavingsMs', 0)
138
  })
139
 
140
- return {'opportunities': opportunities[:5]} # Top 5 opportunities
141
 
142
  def _extract_diagnostics(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
143
- """Extract diagnostic information"""
144
  mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
145
 
146
  diagnostics = []
@@ -162,7 +151,6 @@ class TechnicalSEOModule:
162
  return {'diagnostics': diagnostics}
163
 
164
  def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
165
- """Return fallback data when API fails"""
166
  return {
167
  'url': url,
168
  'error': f"PageSpeed API unavailable: {error}",
 
4
 
5
  class TechnicalSEOModule:
6
  def __init__(self, api_key: Optional[str] = None):
 
 
 
 
 
 
7
  self.api_key = api_key
8
  self.base_url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
9
 
 
39
  return self._get_fallback_data(url, str(e))
40
 
41
  def _get_pagespeed_data(self, url: str, strategy: str) -> Dict[str, Any]:
 
42
  params = {
43
  'url': url,
44
  'strategy': strategy,
 
57
  raise
58
 
59
  def _extract_metrics(self, data: Dict[str, Any], strategy: str) -> Dict[str, Any]:
 
60
  lighthouse_result = data.get('lighthouseResult', {})
61
  categories = lighthouse_result.get('categories', {})
62
  audits = lighthouse_result.get('audits', {})
 
83
  }
84
 
85
  def _extract_core_web_vitals(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
 
86
  def get_metric_value(data, metric_key):
87
  audits = data.get('lighthouseResult', {}).get('audits', {})
88
  metric = audits.get(metric_key, {})
 
107
  }
108
 
109
  def _extract_opportunities(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
 
110
  mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
111
 
112
  opportunities = []
 
118
 
119
  for key in opportunity_keys:
120
  audit = mobile_audits.get(key, {})
121
+ if audit.get('score', 1) < 0.9:
122
  opportunities.append({
123
  'id': key,
124
  'title': audit.get('title', key.replace('-', ' ').title()),
 
127
  'potential_savings': audit.get('details', {}).get('overallSavingsMs', 0)
128
  })
129
 
130
+ return {'opportunities': opportunities[:5]}
131
 
132
  def _extract_diagnostics(self, mobile_data: Dict[str, Any], desktop_data: Dict[str, Any]) -> Dict[str, Any]:
 
133
  mobile_audits = mobile_data.get('lighthouseResult', {}).get('audits', {})
134
 
135
  diagnostics = []
 
151
  return {'diagnostics': diagnostics}
152
 
153
  def _get_fallback_data(self, url: str, error: str) -> Dict[str, Any]:
 
154
  return {
155
  'url': url,
156
  'error': f"PageSpeed API unavailable: {error}",
report_generator.py CHANGED
@@ -5,11 +5,45 @@ import plotly.graph_objects as go
5
  import plotly.express as px
6
  from plotly.offline import plot
7
  import plotly
 
8
 
9
  class ReportGenerator:
10
  def __init__(self):
11
  self.report_template = self._get_report_template()
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def generate_html_report(self, url: str, technical_data: Dict[str, Any],
14
  content_data: Dict[str, Any], competitor_data: List[Dict] = None,
15
  keywords_data: Dict[str, Any] = None, backlinks_data: Dict[str, Any] = None,
@@ -44,8 +78,7 @@ class ReportGenerator:
44
  if competitor_data:
45
  competitor_section = self._generate_competitor_section(competitor_data, technical_data, content_data)
46
 
47
- # Generate placeholder sections
48
- placeholder_sections = self._generate_placeholder_sections()
49
 
50
  # Generate recommendations
51
  recommendations = self._generate_recommendations(technical_data, content_data)
@@ -61,7 +94,7 @@ class ReportGenerator:
61
  keywords_section=keywords_section,
62
  backlinks_section=backlinks_section,
63
  competitor_section=competitor_section,
64
- placeholder_sections=placeholder_sections,
65
  recommendations=recommendations,
66
  llm_recommendations=recommendations_section
67
  )
@@ -538,50 +571,7 @@ class ReportGenerator:
538
 
539
  return comparison_html
540
 
541
- def _generate_placeholder_sections(self) -> str:
542
- """Generate placeholder sections for future modules"""
543
- return """
544
- <div class="placeholder-sections">
545
- <div class="placeholder-section">
546
- <h3>🔍 Keyword Rankings</h3>
547
- <div class="placeholder-content">
548
- <p><em>Coming in future versions</em></p>
549
- <ul>
550
- <li>Google Search Console integration</li>
551
- <li>Keyword ranking positions</li>
552
- <li>Search volume analysis</li>
553
- <li>Keyword opportunities</li>
554
- </ul>
555
- </div>
556
- </div>
557
-
558
- <div class="placeholder-section">
559
- <h3>🔗 Backlink Profile</h3>
560
- <div class="placeholder-content">
561
- <p><em>Coming in future versions</em></p>
562
- <ul>
563
- <li>Total backlinks and referring domains</li>
564
- <li>Domain authority metrics</li>
565
- <li>Anchor text analysis</li>
566
- <li>Link acquisition opportunities</li>
567
- </ul>
568
- </div>
569
- </div>
570
-
571
- <div class="placeholder-section">
572
- <h3>📈 Conversion Tracking</h3>
573
- <div class="placeholder-content">
574
- <p><em>Coming in future versions</em></p>
575
- <ul>
576
- <li>Google Analytics integration</li>
577
- <li>Organic traffic conversion rates</li>
578
- <li>Goal completion tracking</li>
579
- <li>Revenue attribution</li>
580
- </ul>
581
- </div>
582
- </div>
583
- </div>
584
- """
585
 
586
  def _generate_recommendations(self, technical_data: Dict[str, Any], content_data: Dict[str, Any]) -> str:
587
  """Generate prioritized recommendations"""
@@ -830,11 +820,11 @@ class ReportGenerator:
830
  """
831
 
832
  def _generate_recommendations_section(self, llm_recommendations: Dict[str, Any]) -> str:
833
- """Generate LLM-powered recommendations section"""
834
  if not llm_recommendations:
835
  return ""
836
 
837
- recommendations = llm_recommendations.get('recommendations', [])
838
  executive_insights = llm_recommendations.get('executive_insights', [])
839
  priority_actions = llm_recommendations.get('priority_actions', [])
840
 
@@ -861,12 +851,17 @@ class ReportGenerator:
861
  """
862
  priority_html += "</div>"
863
 
 
864
  recommendations_html = ""
865
- if recommendations:
866
- recommendations_html = "<div class='llm-recommendations'><h4>🤖 AI-Generated Recommendations</h4><ul>"
867
- for rec in recommendations:
868
- recommendations_html += f"<li>{rec}</li>"
869
- recommendations_html += "</ul></div>"
 
 
 
 
870
 
871
  return f"""
872
  <div class="card">
@@ -1258,6 +1253,56 @@ class ReportGenerator:
1258
  text-align: center;
1259
  }}
1260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1261
  @media (max-width: 768px) {{
1262
  .report-container {{
1263
  padding: 10px;
 
5
  import plotly.express as px
6
  from plotly.offline import plot
7
  import plotly
8
+ import re
9
 
10
  class ReportGenerator:
11
  def __init__(self):
12
  self.report_template = self._get_report_template()
13
 
14
+ def _markdown_to_html(self, markdown_text: str) -> str:
15
+ """Convert simple markdown to HTML"""
16
+ if not markdown_text:
17
+ return ""
18
+
19
+ html = markdown_text
20
+
21
+ # Convert headers
22
+ html = re.sub(r'^### (.*?)$', r'<h3>\1</h3>', html, flags=re.MULTILINE)
23
+ html = re.sub(r'^## (.*?)$', r'<h2>\1</h2>', html, flags=re.MULTILINE)
24
+ html = re.sub(r'^# (.*?)$', r'<h1>\1</h1>', html, flags=re.MULTILINE)
25
+
26
+ # Convert bold text
27
+ html = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', html)
28
+
29
+ # Convert bullet points
30
+ html = re.sub(r'^- (.*?)$', r'<li>\1</li>', html, flags=re.MULTILINE)
31
+ html = re.sub(r'^• (.*?)$', r'<li>\1</li>', html, flags=re.MULTILINE)
32
+
33
+ # Wrap consecutive <li> tags in <ul>
34
+ html = re.sub(r'(<li>.*?</li>(?:\s*<li>.*?</li>)*)', r'<ul>\1</ul>', html, flags=re.DOTALL)
35
+
36
+ # Convert line breaks to <br> tags
37
+ html = html.replace('\n', '<br>')
38
+
39
+ # Clean up extra <br> tags around block elements
40
+ html = re.sub(r'<br>\s*(<h[1-6]>)', r'\1', html)
41
+ html = re.sub(r'(</h[1-6]>)\s*<br>', r'\1', html)
42
+ html = re.sub(r'<br>\s*(<ul>)', r'\1', html)
43
+ html = re.sub(r'(</ul>)\s*<br>', r'\1', html)
44
+
45
+ return html
46
+
47
  def generate_html_report(self, url: str, technical_data: Dict[str, Any],
48
  content_data: Dict[str, Any], competitor_data: List[Dict] = None,
49
  keywords_data: Dict[str, Any] = None, backlinks_data: Dict[str, Any] = None,
 
78
  if competitor_data:
79
  competitor_section = self._generate_competitor_section(competitor_data, technical_data, content_data)
80
 
81
+
 
82
 
83
  # Generate recommendations
84
  recommendations = self._generate_recommendations(technical_data, content_data)
 
94
  keywords_section=keywords_section,
95
  backlinks_section=backlinks_section,
96
  competitor_section=competitor_section,
97
+
98
  recommendations=recommendations,
99
  llm_recommendations=recommendations_section
100
  )
 
571
 
572
  return comparison_html
573
 
574
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
 
576
  def _generate_recommendations(self, technical_data: Dict[str, Any], content_data: Dict[str, Any]) -> str:
577
  """Generate prioritized recommendations"""
 
820
  """
821
 
822
  def _generate_recommendations_section(self, llm_recommendations: Dict[str, Any]) -> str:
823
+ """Generate LLM-powered recommendations section with markdown rendering"""
824
  if not llm_recommendations:
825
  return ""
826
 
827
+ recommendations_markdown = llm_recommendations.get('recommendations_markdown', '')
828
  executive_insights = llm_recommendations.get('executive_insights', [])
829
  priority_actions = llm_recommendations.get('priority_actions', [])
830
 
 
851
  """
852
  priority_html += "</div>"
853
 
854
+ # Convert markdown recommendations to HTML
855
  recommendations_html = ""
856
+ if recommendations_markdown:
857
+ recommendations_html = f"""
858
+ <div class='llm-recommendations'>
859
+ <h4>🤖 AI-Generated Recommendations</h4>
860
+ <div class="markdown-content">
861
+ {self._markdown_to_html(recommendations_markdown)}
862
+ </div>
863
+ </div>
864
+ """
865
 
866
  return f"""
867
  <div class="card">
 
1253
  text-align: center;
1254
  }}
1255
 
1256
+ .markdown-content {{
1257
+ line-height: 1.6;
1258
+ color: #2c3e50;
1259
+ }}
1260
+
1261
+ .markdown-content h1 {{
1262
+ color: #2c3e50;
1263
+ border-bottom: 2px solid #3498db;
1264
+ padding-bottom: 10px;
1265
+ margin-top: 30px;
1266
+ margin-bottom: 20px;
1267
+ }}
1268
+
1269
+ .markdown-content h2 {{
1270
+ color: #34495e;
1271
+ margin-top: 25px;
1272
+ margin-bottom: 15px;
1273
+ font-size: 1.3em;
1274
+ }}
1275
+
1276
+ .markdown-content h3 {{
1277
+ color: #34495e;
1278
+ margin-top: 20px;
1279
+ margin-bottom: 10px;
1280
+ font-size: 1.1em;
1281
+ }}
1282
+
1283
+ .markdown-content strong {{
1284
+ color: #2c3e50;
1285
+ font-weight: 600;
1286
+ }}
1287
+
1288
+ .markdown-content ul {{
1289
+ margin: 15px 0;
1290
+ padding-left: 20px;
1291
+ }}
1292
+
1293
+ .markdown-content li {{
1294
+ margin-bottom: 8px;
1295
+ line-height: 1.5;
1296
+ }}
1297
+
1298
+ .llm-recommendations {{
1299
+ background: #f8f9fa;
1300
+ border-left: 4px solid #3498db;
1301
+ padding: 20px;
1302
+ margin: 20px 0;
1303
+ border-radius: 0 8px 8px 0;
1304
+ }}
1305
+
1306
  @media (max-width: 768px) {{
1307
  .report-container {{
1308
  padding: 10px;
simple_pdf_generator.py CHANGED
@@ -17,13 +17,10 @@ class SimplePDFGenerator:
17
  self.available = False
18
 
19
  def generate_pdf(self, html_content: str) -> bytes:
20
- """
21
- Generate PDF from HTML content with better formatting
22
- """
23
  if not self.available:
24
  raise ImportError("PDF generation requires reportlab: pip install reportlab")
25
 
26
- # Import reportlab components
27
  from reportlab.pdfgen import canvas
28
  from reportlab.lib.pagesizes import letter, A4
29
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
@@ -36,10 +33,10 @@ class SimplePDFGenerator:
36
  # Parse HTML and extract content
37
  soup = BeautifulSoup(html_content, 'html.parser')
38
 
39
- # Create PDF buffer
40
  buffer = io.BytesIO()
41
 
42
- # Create PDF document with margins
43
  doc = SimpleDocTemplate(
44
  buffer,
45
  pagesize=A4,
@@ -49,17 +46,17 @@ class SimplePDFGenerator:
49
  rightMargin=0.75*inch
50
  )
51
 
52
- # Define custom styles
53
  styles = getSampleStyleSheet()
54
 
55
- # Custom styles
56
  title_style = ParagraphStyle(
57
  'CustomTitle',
58
  parent=styles['Heading1'],
59
  fontSize=24,
60
  textColor=black,
61
  spaceAfter=20,
62
- alignment=1 # Center
63
  )
64
 
65
  header_style = ParagraphStyle(
@@ -82,7 +79,7 @@ class SimplePDFGenerator:
82
 
83
  story = []
84
 
85
- # Add report title
86
  title = "SEO Analysis Report"
87
  url_elem = soup.find(string=re.compile(r'https?://'))
88
  if url_elem:
@@ -93,13 +90,13 @@ class SimplePDFGenerator:
93
  story.append(Paragraph(title, title_style))
94
  story.append(Spacer(1, 20))
95
 
96
- # Extract and format content systematically
97
  self._extract_executive_summary(soup, story, header_style, styles['Normal'])
98
  self._extract_technical_seo(soup, story, header_style, subheader_style, styles['Normal'])
99
  self._extract_content_audit(soup, story, header_style, subheader_style, styles['Normal'])
100
  self._extract_recommendations(soup, story, header_style, styles['Normal'])
101
 
102
- # Build PDF
103
  doc.build(story)
104
 
105
  # Get PDF data
@@ -107,12 +104,11 @@ class SimplePDFGenerator:
107
  return buffer.getvalue()
108
 
109
  def _extract_executive_summary(self, soup, story, header_style, normal_style):
110
- """Extract executive summary section"""
111
  exec_section = soup.find(string=re.compile(r'Executive Summary', re.I))
112
  if exec_section:
113
  story.append(Paragraph("Executive Summary", header_style))
114
 
115
- # Look for health score
116
  health_text = soup.find(string=re.compile(r'Overall SEO Health', re.I))
117
  if health_text:
118
  parent = health_text.find_parent()
@@ -122,14 +118,13 @@ class SimplePDFGenerator:
122
  story.append(Spacer(1, 10))
123
 
124
  def _extract_technical_seo(self, soup, story, header_style, subheader_style, normal_style):
125
- """Extract technical SEO section"""
126
  tech_section = soup.find(string=re.compile(r'Technical SEO', re.I))
127
  if tech_section:
128
  story.append(Paragraph("Technical SEO Analysis", header_style))
129
 
130
- # Look for performance scores
131
  perf_elements = soup.find_all(string=re.compile(r'Performance Score|Mobile|Desktop', re.I))
132
- for elem in perf_elements[:3]: # Limit results
133
  parent = elem.find_parent()
134
  if parent:
135
  text = parent.get_text().strip()
@@ -138,14 +133,13 @@ class SimplePDFGenerator:
138
  story.append(Spacer(1, 10))
139
 
140
  def _extract_content_audit(self, soup, story, header_style, subheader_style, normal_style):
141
- """Extract content audit section"""
142
  content_section = soup.find(string=re.compile(r'Content Audit', re.I))
143
  if content_section:
144
  story.append(Paragraph("Content Audit", header_style))
145
 
146
- # Look for content metrics
147
  content_elements = soup.find_all(string=re.compile(r'Pages Analyzed|Metadata|Word Count', re.I))
148
- for elem in content_elements[:3]: # Limit results
149
  parent = elem.find_parent()
150
  if parent:
151
  text = parent.get_text().strip()
@@ -154,23 +148,19 @@ class SimplePDFGenerator:
154
  story.append(Spacer(1, 10))
155
 
156
  def _extract_recommendations(self, soup, story, header_style, normal_style):
157
- """Extract recommendations section"""
158
  rec_section = soup.find(string=re.compile(r'Recommendation', re.I))
159
  if rec_section:
160
  story.append(Paragraph("Recommendations", header_style))
161
 
162
- # Look for recommendation items
163
  rec_elements = soup.find_all('li')
164
- for elem in rec_elements[:5]: # Top 5 recommendations
165
  text = elem.get_text().strip()
166
  if len(text) > 15:
167
  story.append(Paragraph(f"• {text}", normal_style))
168
  story.append(Spacer(1, 10))
169
 
170
  def create_browser_pdf_instructions() -> str:
171
- """
172
- Return instructions for manual PDF creation using browser
173
- """
174
  return """
175
  ## How to Create PDF from HTML Report:
176
 
 
17
  self.available = False
18
 
19
  def generate_pdf(self, html_content: str) -> bytes:
 
 
 
20
  if not self.available:
21
  raise ImportError("PDF generation requires reportlab: pip install reportlab")
22
 
23
+
24
  from reportlab.pdfgen import canvas
25
  from reportlab.lib.pagesizes import letter, A4
26
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 
33
  # Parse HTML and extract content
34
  soup = BeautifulSoup(html_content, 'html.parser')
35
 
36
+
37
  buffer = io.BytesIO()
38
 
39
+
40
  doc = SimpleDocTemplate(
41
  buffer,
42
  pagesize=A4,
 
46
  rightMargin=0.75*inch
47
  )
48
 
49
+
50
  styles = getSampleStyleSheet()
51
 
52
+
53
  title_style = ParagraphStyle(
54
  'CustomTitle',
55
  parent=styles['Heading1'],
56
  fontSize=24,
57
  textColor=black,
58
  spaceAfter=20,
59
+ alignment=1
60
  )
61
 
62
  header_style = ParagraphStyle(
 
79
 
80
  story = []
81
 
82
+
83
  title = "SEO Analysis Report"
84
  url_elem = soup.find(string=re.compile(r'https?://'))
85
  if url_elem:
 
90
  story.append(Paragraph(title, title_style))
91
  story.append(Spacer(1, 20))
92
 
93
+
94
  self._extract_executive_summary(soup, story, header_style, styles['Normal'])
95
  self._extract_technical_seo(soup, story, header_style, subheader_style, styles['Normal'])
96
  self._extract_content_audit(soup, story, header_style, subheader_style, styles['Normal'])
97
  self._extract_recommendations(soup, story, header_style, styles['Normal'])
98
 
99
+
100
  doc.build(story)
101
 
102
  # Get PDF data
 
104
  return buffer.getvalue()
105
 
106
  def _extract_executive_summary(self, soup, story, header_style, normal_style):
 
107
  exec_section = soup.find(string=re.compile(r'Executive Summary', re.I))
108
  if exec_section:
109
  story.append(Paragraph("Executive Summary", header_style))
110
 
111
+
112
  health_text = soup.find(string=re.compile(r'Overall SEO Health', re.I))
113
  if health_text:
114
  parent = health_text.find_parent()
 
118
  story.append(Spacer(1, 10))
119
 
120
  def _extract_technical_seo(self, soup, story, header_style, subheader_style, normal_style):
 
121
  tech_section = soup.find(string=re.compile(r'Technical SEO', re.I))
122
  if tech_section:
123
  story.append(Paragraph("Technical SEO Analysis", header_style))
124
 
125
+
126
  perf_elements = soup.find_all(string=re.compile(r'Performance Score|Mobile|Desktop', re.I))
127
+ for elem in perf_elements[:3]:
128
  parent = elem.find_parent()
129
  if parent:
130
  text = parent.get_text().strip()
 
133
  story.append(Spacer(1, 10))
134
 
135
  def _extract_content_audit(self, soup, story, header_style, subheader_style, normal_style):
 
136
  content_section = soup.find(string=re.compile(r'Content Audit', re.I))
137
  if content_section:
138
  story.append(Paragraph("Content Audit", header_style))
139
 
140
+
141
  content_elements = soup.find_all(string=re.compile(r'Pages Analyzed|Metadata|Word Count', re.I))
142
+ for elem in content_elements[:3]:
143
  parent = elem.find_parent()
144
  if parent:
145
  text = parent.get_text().strip()
 
148
  story.append(Spacer(1, 10))
149
 
150
  def _extract_recommendations(self, soup, story, header_style, normal_style):
 
151
  rec_section = soup.find(string=re.compile(r'Recommendation', re.I))
152
  if rec_section:
153
  story.append(Paragraph("Recommendations", header_style))
154
 
155
+
156
  rec_elements = soup.find_all('li')
157
+ for elem in rec_elements[:5]:
158
  text = elem.get_text().strip()
159
  if len(text) > 15:
160
  story.append(Paragraph(f"• {text}", normal_style))
161
  story.append(Spacer(1, 10))
162
 
163
  def create_browser_pdf_instructions() -> str:
 
 
 
164
  return """
165
  ## How to Create PDF from HTML Report:
166