.gitattributes CHANGED
@@ -36,3 +36,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  assets/Montserrat-Bold.ttf filter=lfs diff=lfs merge=lfs -text
37
  assets/Montserrat-Regular.ttf filter=lfs diff=lfs merge=lfs -text
38
  assets/template_natura_empty.jpg filter=lfs diff=lfs merge=lfs -text
 
 
 
36
  assets/Montserrat-Bold.ttf filter=lfs diff=lfs merge=lfs -text
37
  assets/Montserrat-Regular.ttf filter=lfs diff=lfs merge=lfs -text
38
  assets/template_natura_empty.jpg filter=lfs diff=lfs merge=lfs -text
39
+ assets/template_1.png filter=lfs diff=lfs merge=lfs -text
40
+ assets/template_2.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -19,7 +19,7 @@ The system uses a Gradio interface (`app.py`) with three main tabs:
19
  * **Expert Perfume Analyst and Web Data Extractor:** This agent extracts detailed perfume information (notes, accords, longevity, sillage, similar fragrances, reviews) from the Fragrantica page.
20
  * **Fragrance Expert Woman and Perfume Analysis Reporter:** This agent synthesizes the extracted data into a human-friendly report, including graded evaluations and personalized recommendations.
21
 
22
- 3. **Image Ad Generator:** This tab allows users to generate a promotional image for a product. It takes the product name, original price, final price, a coupon code, and a product image URL as input. The tool then generates a promotional image with this information, based on a template.
23
 
24
  ## Merchant Support
25
 
@@ -62,5 +62,7 @@ The application now supports generating ad copy for both **Natura** and **Mercad
62
  - [x] Add support for any model/api key supported by LiteLLM.
63
  - [x] Add Fragrantica support, where user will input a Fragrantica URL and the agent will extract and generate a Perfume Analysis report.
64
  - [x] Support Mercado Livre Merchant
65
- - [wip] Add image templates
 
 
66
  - [] Create carroussel images for Fragrantica post
 
19
  * **Expert Perfume Analyst and Web Data Extractor:** This agent extracts detailed perfume information (notes, accords, longevity, sillage, similar fragrances, reviews) from the Fragrantica page.
20
  * **Fragrance Expert Woman and Perfume Analysis Reporter:** This agent synthesizes the extracted data into a human-friendly report, including graded evaluations and personalized recommendations.
21
 
22
+ 3. **Image Ad Generator:** This tab allows users to generate a promotional image for a product. It takes the product name, original price, final price, a coupon code, and a product image URL as input. The tool then generates a promotional image with this information, based on a template. The generated image is saved as a temporary file and displayed in the interface.
23
 
24
  ## Merchant Support
25
 
 
62
  - [x] Add support for any model/api key supported by LiteLLM.
63
  - [x] Add Fragrantica support, where user will input a Fragrantica URL and the agent will extract and generate a Perfume Analysis report.
64
  - [x] Support Mercado Livre Merchant
65
+ - [x] Add image templates
66
+ - [wip] Add share button to images, that work with instagram on mobile. Android/IPhone.
67
+ - [] Make image template trigger/working seamless with data from generated post.
68
  - [] Create carroussel images for Fragrantica post
app.py CHANGED
@@ -1,6 +1,9 @@
1
  import gradio as gr
2
  import os
3
  import requests
 
 
 
4
  from crewai import Agent, Task, Crew, Process, LLM
5
  from dotenv import load_dotenv
6
  from stealth_scrape_tool import StealthScrapeTool
@@ -140,6 +143,42 @@ def clean_env_vars():
140
  os.environ.pop("OPENAI_BASE_URL", None)
141
  os.environ.pop("OPENAI_MODEL_NAME", None)
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  # --- Gradio Interface ---
144
  def generate_ad(product_url: str, store_name: str, main_cupom: str, main_cupom_discount_percentage: float, cupom_1: str, original_price: float, discounted_price: float, openai_api_key: str, natura_api_token: str, openai_base_url: str, openai_model_name: str):
145
  yield gr.update(interactive=False, value="Generating..."), gr.Markdown(value="⏳ Generating ad... Please wait.")
@@ -175,7 +214,7 @@ with gr.Blocks() as demo:
175
  original_price_input = gr.Number(label="Original Price (Optional)", value=0, minimum=0)
176
  discounted_price_input = gr.Number(label="Discounted Price (Optional)", value=0, minimum=0)
177
  with gr.Row():
178
- generate_button = gr.Button("Generate Ad")
179
  clear_button = gr.Button("Clear")
180
  ad_output = gr.Markdown(label="Your Generated Ad", show_copy_button=True)
181
 
@@ -185,6 +224,25 @@ with gr.Blocks() as demo:
185
  analyze_fragrantica_button = gr.Button("Analyze Fragrantica Product")
186
  fragrantica_output = gr.Markdown(label="Fragrantica Analysis Report")
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  with gr.Tab("Settings"):
189
  gr.Markdown("### ⚙️ API Key Settings")
190
  gr.Markdown("Enter your API keys below. These will be used for the current session.")
@@ -200,21 +258,57 @@ with gr.Blocks() as demo:
200
  def clear_fields():
201
  return "", 0, 0
202
 
203
- generate_button.click(generate_ad, inputs=[url_input, store_name_input, main_cupom_input, main_cupom_discount_percentage_input, cupom_1_input, original_price_input, discounted_price_input, openai_key_input, natura_token_input, openai_base_url_input, openai_model_name_input], outputs=[generate_button, ad_output])
204
- clear_button.click(clear_fields, inputs=[], outputs=[url_input, original_price_input, discounted_price_input])
205
-
206
- # Placeholder for Fragrantica analysis function
207
  def analyze_fragrantica_url(url, openai_api_key, natura_api_token, openai_base_url, openai_model_name):
 
208
  if not openai_api_key or not openai_model_name or not openai_base_url:
209
- return "Please configure your API keys in the settings section below."
 
210
  from fragrantica_crew import FragranticaCrew
211
  fragrantica_crew = FragranticaCrew(openai_api_key, openai_base_url, openai_model_name)
212
  report = fragrantica_crew.kickoff(url=url)
213
  if report == "SCRAPING_FAILED":
214
- return "❌ Scraping failed. The website could not be accessed or parsed. Please check the URL or try again later."
215
- return report.raw
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
 
 
217
  analyze_fragrantica_button.click(analyze_fragrantica_url, inputs=[fragrantica_url_input, openai_key_input, natura_token_input, openai_base_url_input, openai_model_name_input], outputs=fragrantica_output)
 
 
 
 
 
218
 
219
  if __name__ == "__main__":
220
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
2
  import os
3
  import requests
4
+ import base64
5
+ from io import BytesIO
6
+ from PIL import Image
7
  from crewai import Agent, Task, Crew, Process, LLM
8
  from dotenv import load_dotenv
9
  from stealth_scrape_tool import StealthScrapeTool
 
143
  os.environ.pop("OPENAI_BASE_URL", None)
144
  os.environ.pop("OPENAI_MODEL_NAME", None)
145
 
146
+ js_share_logic = """
147
+ async (image_data_url) => {
148
+ if (!image_data_url) {
149
+ console.error("Share button clicked, but no image data URL found.");
150
+ alert("No image to share. Please generate an image first.");
151
+ return;
152
+ }
153
+
154
+ const dataUrl = image_data_url;
155
+
156
+ try {
157
+ const response = await fetch(dataUrl);
158
+ const blob = await response.blob();
159
+ const file = new File([blob], "shared_image.png", { type: "image/png" });
160
+
161
+ if (navigator.share && navigator.canShare({ files: [file] })) {
162
+ await navigator.share({
163
+ files: [file],
164
+ title: "Check out this image!",
165
+ text: "Created with a cool Gradio app.",
166
+ });
167
+ } else {
168
+ alert("Sharing not supported on this browser.");
169
+ }
170
+ } catch (error) {
171
+ if (error.name === 'AbortError') {
172
+ console.log("Share dialog cancelled by user.");
173
+ // Do nothing, as this is a user-initiated cancellation
174
+ } else {
175
+ console.error("Error sharing:", error);
176
+ alert("An error occurred while trying to share.");
177
+ }
178
+ }
179
+ }
180
+ """
181
+
182
  # --- Gradio Interface ---
183
  def generate_ad(product_url: str, store_name: str, main_cupom: str, main_cupom_discount_percentage: float, cupom_1: str, original_price: float, discounted_price: float, openai_api_key: str, natura_api_token: str, openai_base_url: str, openai_model_name: str):
184
  yield gr.update(interactive=False, value="Generating..."), gr.Markdown(value="⏳ Generating ad... Please wait.")
 
214
  original_price_input = gr.Number(label="Original Price (Optional)", value=0, minimum=0)
215
  discounted_price_input = gr.Number(label="Discounted Price (Optional)", value=0, minimum=0)
216
  with gr.Row():
217
+ generate_ad_button = gr.Button("Generate Ad")
218
  clear_button = gr.Button("Clear")
219
  ad_output = gr.Markdown(label="Your Generated Ad", show_copy_button=True)
220
 
 
224
  analyze_fragrantica_button = gr.Button("Analyze Fragrantica Product")
225
  fragrantica_output = gr.Markdown(label="Fragrantica Analysis Report")
226
 
227
+ with gr.Tab("Images"):
228
+ gr.Markdown("### 🖼️ Generate Promotional Image")
229
+ with gr.Row():
230
+ with gr.Column():
231
+ image_product_url_input = gr.Textbox(label="Product Image URL", placeholder="Enter product image URL...")
232
+ image_product_name_input = gr.Textbox(label="Product Name", placeholder="Enter product name...")
233
+ image_original_price_input = gr.Number(label="Original Price", placeholder="Enter original price...")
234
+ image_final_price_input = gr.Number(label="Final Price", placeholder="Enter final price...")
235
+ image_coupon_code_input = gr.Textbox(label="Coupon Code", placeholder="Enter coupon code...")
236
+ gen_image_btn = gr.Button("Generate Image")
237
+ with gr.Column():
238
+ image_output = gr.Image(label="Generated Image", height=500, type="pil", interactive=False)
239
+ image_data_url_storage = gr.Textbox(
240
+ label="Image Data URL (for debugging)",
241
+ visible=True,
242
+ elem_id="image_data_url_storage"
243
+ )
244
+ share_button = gr.Button("🚀 Share Image", interactive=False)
245
+
246
  with gr.Tab("Settings"):
247
  gr.Markdown("### ⚙️ API Key Settings")
248
  gr.Markdown("Enter your API keys below. These will be used for the current session.")
 
258
  def clear_fields():
259
  return "", 0, 0
260
 
 
 
 
 
261
  def analyze_fragrantica_url(url, openai_api_key, natura_api_token, openai_base_url, openai_model_name):
262
+ yield "⏳ Analyzing Fragrantica product... Please wait." # Loading message
263
  if not openai_api_key or not openai_model_name or not openai_base_url:
264
+ yield "Please configure your API keys in the settings section below."
265
+ return
266
  from fragrantica_crew import FragranticaCrew
267
  fragrantica_crew = FragranticaCrew(openai_api_key, openai_base_url, openai_model_name)
268
  report = fragrantica_crew.kickoff(url=url)
269
  if report == "SCRAPING_FAILED":
270
+ yield "❌ Scraping failed. The website could not be accessed or parsed. Please check the URL or try again later."
271
+ return
272
+ yield report.raw
273
+
274
+ def image_to_base64(pil_image):
275
+ print(f"image_to_base64 called. pil_image type: {type(pil_image)}")
276
+ if pil_image is None:
277
+ print("pil_image is None. Returning empty string and interactive=False.")
278
+ return "", gr.update(interactive=False)
279
+ buffered = BytesIO()
280
+ pil_image.save(buffered, format="PNG")
281
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
282
+ data_url = f"data:image/png;base64,{img_str}"
283
+ print("Returning data_url and interactive=True.")
284
+ return data_url, gr.update(interactive=True)
285
+
286
+ def generate_image(product_image_url, product_name, original_price, final_price, coupon_code):
287
+ tool = GenerateImageTool()
288
+ original_price_str = f"{original_price:.2f}".replace('.', ',')
289
+ final_price_str = f"{final_price:.2f}".replace('.', ',')
290
+
291
+ yield gr.update(interactive=False, value="Generating..."), None
292
+
293
+ image_path = tool._run(
294
+ product_image_url=product_image_url,
295
+ product_name=product_name,
296
+ original_price=original_price_str,
297
+ final_price=final_price_str,
298
+ coupon_code=coupon_code
299
+ )
300
+
301
+ pil_image = Image.open(image_path)
302
+ yield gr.update(interactive=True, value="Generate Image"), pil_image
303
 
304
+ generate_ad_button.click(generate_ad, inputs=[url_input, store_name_input, main_cupom_input, main_cupom_discount_percentage_input, cupom_1_input, original_price_input, discounted_price_input, openai_key_input, natura_token_input, openai_base_url_input, openai_model_name_input], outputs=[generate_ad_button, ad_output])
305
+ clear_button.click(clear_fields, inputs=[], outputs=[url_input, original_price_input, discounted_price_input])
306
  analyze_fragrantica_button.click(analyze_fragrantica_url, inputs=[fragrantica_url_input, openai_key_input, natura_token_input, openai_base_url_input, openai_model_name_input], outputs=fragrantica_output)
307
+ gen_image_btn.click(generate_image,
308
+ inputs=[image_product_url_input, image_product_name_input, image_original_price_input, image_final_price_input, image_coupon_code_input],
309
+ outputs=[gen_image_btn, image_output])
310
+ image_output.change(image_to_base64, inputs=image_output, outputs=[image_data_url_storage, share_button])
311
+ share_button.click(fn=None, inputs=[image_data_url_storage], js=js_share_logic)
312
 
313
  if __name__ == "__main__":
314
+ demo.launch(server_name="0.0.0.0", server_port=7860)
assets/template_1.png ADDED

Git LFS Details

  • SHA256: 231de1f34891d0aa33265fa9327c53fdbac59590ce896a231a7520870531e311
  • Pointer size: 131 Bytes
  • Size of remote file: 721 kB
assets/template_2.png ADDED

Git LFS Details

  • SHA256: d24c250f4d9b969b0a52b966d792efa7261b09da87be9b5f08af881e17a3d8b3
  • Pointer size: 132 Bytes
  • Size of remote file: 1.43 MB
fragrantica_crew.py CHANGED
@@ -12,7 +12,8 @@ class FragranticaCrew:
12
  llm = LLM(
13
  api_key=self.openai_api_key,
14
  model=self.openai_model_name,
15
- base_url=self.openai_base_url
 
16
  )
17
 
18
  self.research_agent = Agent(
@@ -39,7 +40,7 @@ class FragranticaCrew:
39
  def kickoff(self, url: str) -> str:
40
  research_task = Task(
41
  description=(
42
- f"""1. Scrape the content of the URL: {url} using the 'Stealth Web Scraper' tool with `website_url` as {url} and `css_element` as "#main-content". If the scraping tool fails or returns empty content ONCE, try the `css_element` as "body", If they also fail when you pass `css_element` as "body", then you MUST return the exact string "SCRAPING_FAILED".
43
 
44
  2. If scraping is successful, carefully analyze the entire page content to extract the following information:
45
 
@@ -54,7 +55,7 @@ class FragranticaCrew:
54
 
55
  - Este Perfume me Lembra do: Find the section titled "Este perfume me lembra do", and list the perfume names mentioned there.
56
 
57
- - Opinião dos usuários: Look for a section containing detailed user reviews, such as "Todas as Resenhas por Data" or similar, and synthesize a detailed summary from these reviews.
58
 
59
  3. Present the extracted information in a clear, structured format, ready for reporting. If any specific piece of information cannot be found, check again to make sure they are not found, after check again, if you truly do not find the info, state 'N/A' for that field. If the entire scraping process fails, return "SCRAPING_FAILED".
60
  """
 
12
  llm = LLM(
13
  api_key=self.openai_api_key,
14
  model=self.openai_model_name,
15
+ base_url=self.openai_base_url,
16
+ max_tokens=32768
17
  )
18
 
19
  self.research_agent = Agent(
 
40
  def kickoff(self, url: str) -> str:
41
  research_task = Task(
42
  description=(
43
+ f"""1. Scrape the content of the URL: {url} using the 'Stealth Web Scraper' tool with `website_url` as {url}, `css_element` as "#main-content", and `wait_for_selectors` as "['#all-reviews']". If the scraping tool fails or returns empty content ONCE, try the `css_element` as "body", If they also fail when you pass `css_element` as "body", then you MUST return the exact string "SCRAPING_FAILED".
44
 
45
  2. If scraping is successful, carefully analyze the entire page content to extract the following information:
46
 
 
55
 
56
  - Este Perfume me Lembra do: Find the section titled "Este perfume me lembra do", and list the perfume names mentioned there.
57
 
58
+ - Opinião dos usuários: Locate the HTML div with ID "all-reviews" within the scraped content. From this div, extract the *full text content* of all individual user reviews. Then, synthesize a detailed summary from these extracted texts, focusing on common themes, sentiments, and key observations from the users.
59
 
60
  3. Present the extracted information in a clear, structured format, ready for reporting. If any specific piece of information cannot be found, check again to make sure they are not found, after check again, if you truly do not find the info, state 'N/A' for that field. If the entire scraping process fails, return "SCRAPING_FAILED".
61
  """
image_generator_tool.py CHANGED
@@ -3,7 +3,7 @@ from pydantic import BaseModel, Field
3
  from PIL import Image, ImageDraw, ImageFont
4
  import requests
5
  from io import BytesIO
6
- import uuid
7
 
8
  class GenerateImageToolInput(BaseModel):
9
  """Input for the Generate Image Tool."""
@@ -14,18 +14,14 @@ class GenerateImageToolInput(BaseModel):
14
  coupon_code: str = Field(..., description="Coupon code to be displayed on the image.")
15
 
16
  import tempfile
17
- import os
18
 
19
  class GenerateImageTool(BaseTool):
20
  name: str = "Generate Image Tool"
21
- description: str = "Generates a promotional image for a product using a template."
22
  args_schema = GenerateImageToolInput
23
 
24
  def _run(self, product_image_url: str, product_name: str, original_price: str, final_price: str, coupon_code: str) -> str:
25
-
26
- template_path = 'assets/template_natura_empty.jpg'
27
- temp_dir = tempfile.gettempdir()
28
- output_path = os.path.join(temp_dir, f'{uuid.uuid4()}.png')
29
 
30
  try:
31
  template_image = Image.open(template_path).convert("RGBA")
@@ -64,15 +60,15 @@ class GenerateImageTool(BaseTool):
64
  black_color = "#000000"
65
 
66
  draw.text((360, 710), product_name, font=font_name, fill=white_color, anchor="ms")
67
- draw.text((360, 800), original_price, font=font_price_from, fill=white_color, anchor="ms")
68
- draw.text((360, 860), final_price, font=font_price, fill=yellow_color, anchor="ms")
69
  draw.text((360, 993), coupon_code, font=font_cupom, fill=black_color, anchor="ms")
70
 
71
- template_image.save(output_path)
72
-
73
- return output_path
74
 
75
  except FileNotFoundError:
76
  return f"Error: The template file '{template_path}' was not found."
77
  except Exception as e:
78
- return f"An error occurred: {e}"
 
3
  from PIL import Image, ImageDraw, ImageFont
4
  import requests
5
  from io import BytesIO
6
+ import base64
7
 
8
  class GenerateImageToolInput(BaseModel):
9
  """Input for the Generate Image Tool."""
 
14
  coupon_code: str = Field(..., description="Coupon code to be displayed on the image.")
15
 
16
  import tempfile
 
17
 
18
  class GenerateImageTool(BaseTool):
19
  name: str = "Generate Image Tool"
20
+ description: str = "Generates a promotional image for a product using a template and returns the file path."
21
  args_schema = GenerateImageToolInput
22
 
23
  def _run(self, product_image_url: str, product_name: str, original_price: str, final_price: str, coupon_code: str) -> str:
24
+ template_path = 'assets/template_1.png'
 
 
 
25
 
26
  try:
27
  template_image = Image.open(template_path).convert("RGBA")
 
60
  black_color = "#000000"
61
 
62
  draw.text((360, 710), product_name, font=font_name, fill=white_color, anchor="ms")
63
+ draw.text((360, 800), f"De: R$ {original_price}", font=font_price_from, fill=white_color, anchor="ms")
64
+ draw.text((360, 860), f"Por: R$ {final_price}", font=font_price, fill=yellow_color, anchor="ms")
65
  draw.text((360, 993), coupon_code, font=font_cupom, fill=black_color, anchor="ms")
66
 
67
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
68
+ template_image.save(temp_file.name)
69
+ return temp_file.name
70
 
71
  except FileNotFoundError:
72
  return f"Error: The template file '{template_path}' was not found."
73
  except Exception as e:
74
+ return f"An error occurred: {e[:1000]}"
stealth_scrape_tool.py CHANGED
@@ -8,38 +8,99 @@ class StealthScrapeTool(BaseTool):
8
  name: str = "Stealth Web Scraper"
9
  description: str = "A tool for stealthily scraping content from a given URL using Playwright and a CSS selector."
10
 
11
- async def _arun(self, website_url: str, css_element = "body") -> str:
12
  try:
13
  async with Stealth().use_async(async_playwright()) as p:
14
  browser = await p.chromium.launch(headless=True)
15
  page = await browser.new_page()
16
 
 
 
17
  await page.goto(website_url, timeout=120000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- try:
20
- # Wait for the specific element to be present
21
- await page.wait_for_selector(css_element, timeout=30000)
22
- except Exception as e:
23
- # If timeout error, try again with "body" as css_element
24
- if "Timeout" in str(e) and css_element != "body":
25
- await page.wait_for_selector("body", timeout=60000)
26
- css_element = "body"
27
- else:
28
- raise e
29
 
 
30
  html_content = await page.content()
31
  soup = BeautifulSoup(html_content, 'html.parser')
32
-
 
 
 
 
 
 
33
  target_element = soup.select_one(css_element)
34
  if target_element:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  return target_element.prettify()
36
  else:
37
  return f"Error: Could not find element with selector '{css_element}' on the page."
38
  except Exception as e:
39
  return f"Error during stealth web scraping: {e}"
40
 
41
- def _run(self, website_url: str, css_element: str) -> str:
42
  # This method is for synchronous execution, which is not ideal for Playwright.
43
  # CrewAI typically calls _arun for async tools.
44
  # For simplicity, we'll just call the async version here.
45
- return asyncio.run(self._arun(website_url, css_element))
 
8
  name: str = "Stealth Web Scraper"
9
  description: str = "A tool for stealthily scraping content from a given URL using Playwright and a CSS selector."
10
 
11
+ async def _arun(self, website_url: str, css_element = "body", wait_for_selectors: list[str] = None) -> str:
12
  try:
13
  async with Stealth().use_async(async_playwright()) as p:
14
  browser = await p.chromium.launch(headless=True)
15
  page = await browser.new_page()
16
 
17
+ print(f"StealthScrapeTool: Starting scraping for {website_url}...")
18
+ print(f"StealthScrapeTool: Navigating to {website_url}")
19
  await page.goto(website_url, timeout=120000)
20
+ await asyncio.sleep(5)
21
+
22
+ # Scroll to the bottom of the page repeatedly to load all dynamic content
23
+ print("StealthScrapeTool: Scrolling through the page to load dynamic content...")
24
+ print("StealthScrapeTool: Getting initial scrollHeight...")
25
+ last_height = await page.evaluate("document.body.scrollHeight")
26
+ print(f"StealthScrapeTool: Initial scrollHeight: {last_height}")
27
+ scroll_attempts = 0
28
+ max_scroll_attempts = 10
29
+
30
+ while scroll_attempts < max_scroll_attempts:
31
+ print(f"StealthScrapeTool: Scroll attempt {scroll_attempts + 1}")
32
+ print("StealthScrapeTool: Scrolling to bottom...")
33
+ await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
34
+ print("StealthScrapeTool: Scrolled. Waiting for content to load...")
35
+
36
+ await asyncio.sleep(5)
37
+
38
+ print("StealthScrapeTool: Getting new scrollHeight...")
39
+ new_height = await page.evaluate("document.body.scrollHeight")
40
+ print(f"StealthScrapeTool: New scrollHeight: {new_height}")
41
+ if new_height == last_height:
42
+ print("StealthScrapeTool: ScrollHeight unchanged. Breaking scroll loop.")
43
+ break
44
+ last_height = new_height
45
+ scroll_attempts += 1
46
+ print("StealthScrapeTool: Finished scrolling.")
47
+
48
+ print(f"StealthScrapeTool: Page loaded. Attempting to find element with selector '{css_element}'")
49
+
50
+ # Element waiting logic
51
+ selectors_to_wait_for = []
52
+ if wait_for_selectors:
53
+ print("StealthScrapeTool: Additional selectors to wait for provided.")
54
+ selectors_to_wait_for.extend(wait_for_selectors)
55
+
56
+ # Always include css_element in the list of selectors to wait for
57
+ selectors_to_wait_for.append(css_element)
58
+
59
+ combined_selector = ", ".join(selectors_to_wait_for)
60
+ print(f"StealthScrapeTool: Waiting for selectors: {combined_selector}")
61
+ await page.wait_for_selector(combined_selector, timeout=60000, state='attached')
62
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ print("StealthScrapeTool: Required elements found. Extracting content...")
65
  html_content = await page.content()
66
  soup = BeautifulSoup(html_content, 'html.parser')
67
+
68
+ # Debug print to confirm if waited-for elements are in the scraped content
69
+ if soup.select_one("#all-reviews"):
70
+ print("StealthScrapeTool: #all-reviews found in scraped content.")
71
+ else:
72
+ print("StealthScrapeTool: #all-reviews NOT found in scraped content.")
73
+
74
  target_element = soup.select_one(css_element)
75
  if target_element:
76
+ # Clean the HTML content
77
+ print(f"Successfully found element with selector '{css_element}'. Cleaning content...")
78
+ for script in target_element.find_all("script"):
79
+ script.decompose()
80
+ for style_tag in target_element.find_all("style"):
81
+ style_tag.decompose()
82
+ for img in target_element.find_all("img"):
83
+ img.decompose()
84
+ for svg in target_element.find_all("svg"):
85
+ svg.decompose()
86
+ for iframe in target_element.find_all("iframe"):
87
+ iframe.decompose()
88
+ for source_tag in target_element.find_all("source"):
89
+ source_tag.decompose()
90
+
91
+ # Remove style attributes from all tags
92
+ for tag in target_element.find_all(True):
93
+ if 'style' in tag.attrs:
94
+ del tag['style']
95
+
96
  return target_element.prettify()
97
  else:
98
  return f"Error: Could not find element with selector '{css_element}' on the page."
99
  except Exception as e:
100
  return f"Error during stealth web scraping: {e}"
101
 
102
+ def _run(self, website_url: str, css_element: str, wait_for_selectors: list[str] = None) -> str:
103
  # This method is for synchronous execution, which is not ideal for Playwright.
104
  # CrewAI typically calls _arun for async tools.
105
  # For simplicity, we'll just call the async version here.
106
+ return asyncio.run(self._arun(website_url, css_element, wait_for_selectors))