Spaces:

Eladlev
/

test_computer_use

Sleeping

App Files Files Community

Eladlev commited on Nov 25, 2024

Commit

4311882

verified ·

1 Parent(s): 6babbf8

Upload app.py

Browse files

Files changed (1) hide show

app.py +151 -249

app.py CHANGED Viewed

@@ -1,262 +1,164 @@
-"""
-Entrypoint for Gradio, see https://gradio.app/
-"""
-import platform
-import asyncio
-import base64
-import os
-from datetime import datetime
-from enum import StrEnum
-from functools import partial
-from pathlib import Path
-from typing import cast, Dict
 import gradio as gr
-from anthropic import APIResponse
 from anthropic.types import TextBlock
 from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock
-from anthropic.types.tool_use_block import ToolUseBlock
-from computer_use_demo.loop import (
-    PROVIDER_TO_DEFAULT_MODEL_NAME,
-    APIProvider,
-    sampling_loop,
-    sampling_loop_sync,
-)
-from computer_use_demo.tools import ToolResult
-CONFIG_DIR = Path("~/.anthropic").expanduser()
-API_KEY_FILE = CONFIG_DIR / "api_key"
-WARNING_TEXT = "⚠️ Security Alert: Never provide access to sensitive accounts or data, as malicious web content can hijack Claude's behavior"
-class Sender(StrEnum):
-    USER = "user"
-    BOT = "assistant"
-    TOOL = "tool"
-def setup_state(state):
-    if "messages" not in state:
-        state["messages"] = []
-    if "api_key" not in state:
-        # Try to load API key from file first, then environment
-        state["api_key"] = load_from_storage("api_key") or os.getenv("ANTHROPIC_API_KEY", "")
-        if not state["api_key"]:
-            print("API key not found. Please set it in the environment or storage.")
-    if "provider" not in state:
-        state["provider"] = os.getenv("API_PROVIDER", "anthropic") or APIProvider.ANTHROPIC
-    if "provider_radio" not in state:
-        state["provider_radio"] = state["provider"]
-    if "model" not in state:
-        _reset_model(state)
-    if "auth_validated" not in state:
-        state["auth_validated"] = False
-    if "responses" not in state:
-        state["responses"] = {}
-    if "tools" not in state:
-        state["tools"] = {}
-    if "only_n_most_recent_images" not in state:
-        state["only_n_most_recent_images"] = 3 # 10
-    if "custom_system_prompt" not in state:
-        state["custom_system_prompt"] = load_from_storage("system_prompt") or ""
-        # remove if want to use default system prompt
-        device_os_name = "Windows" if platform.platform == "Windows" else "Mac" if platform.platform == "Darwin" else "Linux"
-        state["custom_system_prompt"] += f"\n\nNOTE: you are operating a {device_os_name} machine"
-    if "hide_images" not in state:
-        state["hide_images"] = False
-def _reset_model(state):
-    state["model"] = PROVIDER_TO_DEFAULT_MODEL_NAME[cast(APIProvider, state["provider"])]
-async def main(state):
-    """Render loop for Gradio"""
-    setup_state(state)
-    return "Setup completed"
-def validate_auth(provider: APIProvider, api_key: str | None):
-    if provider == APIProvider.ANTHROPIC:
-        if not api_key:
-            return "Enter your Anthropic API key to continue."
-    if provider == APIProvider.BEDROCK:
-        import boto3
-        if not boto3.Session().get_credentials():
-            return "You must have AWS credentials set up to use the Bedrock API."
-    if provider == APIProvider.VERTEX:
-        import google.auth
-        from google.auth.exceptions import DefaultCredentialsError
-        if not os.environ.get("CLOUD_ML_REGION"):
-            return "Set the CLOUD_ML_REGION environment variable to use the Vertex API."
-        try:
-            google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
-        except DefaultCredentialsError:
-            return "Your google cloud credentials are not set up correctly."
-def load_from_storage(filename: str) -> str | None:
-    """Load data from a file in the storage directory."""
-    try:
-        file_path = CONFIG_DIR / filename
-        if file_path.exists():
-            data = file_path.read_text().strip()
-            if data:
-                return data
-    except Exception as e:
-        print(f"Debug: Error loading {filename}: {e}")
-    return None
-def save_to_storage(filename: str, data: str) -> None:
-    """Save data to a file in the storage directory."""
-    try:
-        CONFIG_DIR.mkdir(parents=True, exist_ok=True)
-        file_path = CONFIG_DIR / filename
-        file_path.write_text(data)
-        # Ensure only user can read/write the file
-        file_path.chmod(0o600)
-    except Exception as e:
-        print(f"Debug: Error saving {filename}: {e}")
-def _api_response_callback(response: APIResponse[BetaMessage], response_state: dict):
-    response_id = datetime.now().isoformat()
-    response_state[response_id] = response
-def _tool_output_callback(tool_output: ToolResult, tool_id: str, tool_state: dict):
-    tool_state[tool_id] = tool_output
-def _render_message(sender: Sender, message: str | BetaTextBlock | BetaToolUseBlock | ToolResult, state):
-    is_tool_result = not isinstance(message, str) and (
-        isinstance(message, ToolResult)
-        or message.__class__.__name__ == "ToolResult"
-        or message.__class__.__name__ == "CLIResult"
-    )
-    if not message or (
-        is_tool_result
-        and state["hide_images"]
-        and not hasattr(message, "error")
-        and not hasattr(message, "output")
-    ):
-        return
-    if is_tool_result:
-        message = cast(ToolResult, message)
-        if message.output:
-            return message.output
-        if message.error:
-            return f"Error: {message.error}"
-        if message.base64_image and not state["hide_images"]:
-            return base64.b64decode(message.base64_image)
-    elif isinstance(message, BetaTextBlock) or isinstance(message, TextBlock):
-        return message.text
-    elif isinstance(message, BetaToolUseBlock) or isinstance(message, ToolUseBlock):
-        return f"Tool Use: {message.name}\nInput: {message.input}"
-    else:
-        return message
-# open new tab, open google sheets inside, then create a new blank spreadsheet
-def process_input(user_input, state):
-    # Ensure the state is properly initialized
-    setup_state(state)
-    # Append the user input to the messages in the state
-    state["messages"].append(
-        {
-            "role": Sender.USER,
-            "content": [TextBlock(type="text", text=user_input)],
-        }
     )
-    # Run the sampling loop synchronously and yield messages
-    for message in sampling_loop(state):
-        yield message
-def accumulate_messages(*args, **kwargs):
-    """
-    Wrapper function to accumulate messages from sampling_loop_sync.
-    """
-    accumulated_messages = []
-    for message in sampling_loop_sync(*args, **kwargs):
-        # Check if the message is already in the accumulated messages
-        if message not in accumulated_messages:
-            accumulated_messages.append(message)
-            # Yield the accumulated messages as a list
-            yield accumulated_messages
-def sampling_loop(state):
-    # Ensure the API key is present
-    if not state.get("api_key"):
-        raise ValueError("API key is missing. Please set it in the environment or storage.")
-    # Call the sampling loop and yield messages
-    for message in accumulate_messages(
-        system_prompt_suffix=state["custom_system_prompt"],
-        model=state["model"],
-        provider=state["provider"],
-        messages=state["messages"],
-        output_callback=partial(_render_message, Sender.BOT, state=state),
-        tool_output_callback=partial(_tool_output_callback, tool_state=state["tools"]),
-        api_response_callback=partial(_api_response_callback, response_state=state["responses"]),
-        api_key=state["api_key"],
-        only_n_most_recent_images=state["only_n_most_recent_images"],
-    ):
-        yield message
-with gr.Blocks() as demo:
-    state = gr.State({})  # Use Gradio's state management
-    gr.Markdown("# Claude Computer Use Demo")
-    if not os.getenv("HIDE_WARNING", False):
-        gr.Markdown(WARNING_TEXT)
     with gr.Row():
-        provider = gr.Dropdown(
-            label="API Provider",
-            choices=[option.value for option in APIProvider],
-            value="anthropic",
-            interactive=True,
-        )
-        model = gr.Textbox(label="Model", value="claude-3-5-sonnet-20241022")
-        api_key = gr.Textbox(
-            label="Anthropic API Key",
-            type="password",
-            value="",
-            interactive=True,
-        )
-        only_n_images = gr.Slider(
-            label="Only send N most recent images",
-            minimum=0,
-            value=3, # 10
-            interactive=True,
-        )
-        custom_prompt = gr.Textbox(
-            label="Custom System Prompt Suffix",
-            value="",
-            interactive=True,
-        )
-        hide_images = gr.Checkbox(label="Hide screenshots", value=False)
-    api_key.change(fn=lambda key: save_to_storage(API_KEY_FILE, key), inputs=api_key)
-    chat_input = gr.Textbox(label="Type a message to send to Claude...")
-    # chat_output = gr.Textbox(label="Chat Output", interactive=False)
-    chatbot = gr.Chatbot(label="Chatbot History", autoscroll=True)
-    # Pass state as an input to the function
-    chat_input.submit(process_input, [chat_input, state], chatbot)
-demo.launch(share=True)

 import gradio as gr
+import io
+import os
+from PIL import Image, ImageDraw
+from anthropic import Anthropic
 from anthropic.types import TextBlock
 from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock
+max_tokens = 4096
+import base64
+model = 'claude-3-5-sonnet-20241022'
+system = """<SYSTEM_CAPABILITY>
+* You are utilizing a Windows system with internet access.
+* The current date is Monday, November 18, 2024.
+</SYSTEM_CAPABILITY>"""
+def save_image_or_get_url(image, filename="processed_image.png"):
+    filepath = os.path.join("static", filename)
+    image.save(filepath)
+    return filepath
+def draw_circle_on_image(image, center, radius=30):
+    """
+    Draws a circle on the given image using a center point and radius.
+    Parameters:
+        image (PIL.Image): The image to draw on.
+        center (tuple): A tuple (x, y) representing the center of the circle.
+        radius (int): The radius of the circle.
+    Returns:
+        PIL.Image: The image with the circle drawn.
+    """
+    if not isinstance(center, tuple) or len(center) != 2:
+        raise ValueError("Center must be a tuple of two values (x, y).")
+    if not isinstance(radius, (int, float)) or radius <= 0:
+        raise ValueError("Radius must be a positive number.")
+    # Calculate the bounding box for the circle
+    bbox = [
+        center[0] - radius, center[1] - radius,  # Top-left corner
+        center[0] + radius, center[1] + radius  # Bottom-right corner
+    ]
+    # Create a drawing context
+    draw = ImageDraw.Draw(image)
+    # Draw the circle
+    draw.ellipse(bbox, outline="red", width=15)  # Change outline color and width as needed
+    return image
+def pil_image_to_base64(pil_image):
+    # Save the PIL image to an in-memory buffer as a file-like object
+    buffered = io.BytesIO()
+    pil_image.save(buffered, format="PNG")  # Specify format (e.g., PNG, JPEG)
+    buffered.seek(0)  # Rewind the buffer to the beginning
+    # Encode the bytes from the buffer to Base64
+    image_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    return image_data
+# Function to simulate chatbot responses
+def chatbot_response(input_text, image, key, chat_history):
+    if not key:
+        return chat_history + [[input_text, "Please enter a valid key."]]
+    if image is None:
+        return chat_history + [[input_text, "Please upload an image."]]
+    api_key =key
+    client = Anthropic(api_key=api_key)
+    messages = [{'role': 'user', 'content': [TextBlock(text=f'Look at my screenshot, {input_text}', type='text')]},
+                {'role': 'assistant', 'content': [BetaTextBlock(
+                    text="I'll help you check your screen, but first I need to take a screenshot to see what you're looking at.",
+                    type='text'), BetaToolUseBlock(id='toolu_01PSTVtavFgmx6ctaiSvacCB',
+                                                   input={'action': 'screenshot'}, name='computer',
+                                                   type='tool_use')]}]
+    image_data = pil_image_to_base64(image)
+    tool_res = {'role': 'user', 'content': [{'type': 'tool_result', 'tool_use_id': 'toolu_01PSTVtavFgmx6ctaiSvacCB',
+                                             'is_error': False,
+                                             'content': [{'type': 'image',
+                                                          'source': {'type': 'base64', 'media_type': 'image/png',
+                                                                     'data': image_data}}]}]}
+    messages.append(tool_res)
+    params = [{'name': 'computer', 'type': 'computer_20241022', 'display_width_px': 1512, 'display_height_px': 982,
+               'display_number': None}, {'type': 'bash_20241022', 'name': 'bash'},
+              {'name': 'str_replace_editor', 'type': 'text_editor_20241022'}]
+    raw_response = client.beta.messages.with_raw_response.create(
+        max_tokens=max_tokens,
+        messages=messages,
+        model=model,
+        system=system,
+        tools=params,
+        betas=["computer-use-2024-10-22"],
+        temperature=0.0,
     )
+    response = raw_response.parse()
+    scale_x = image.width // 1512
+    scale_y = image.height // 982
+    for r in response.content:
+        if hasattr(r, 'text'):
+            chat_history = chat_history + [[input_text, r.text]]
+        if hasattr(r, 'input') and 'coordinate' in r.input:
+            coordinate = r.input['coordinate']
+            new_image = draw_circle_on_image(image, (coordinate[0] * scale_x, coordinate[1] * scale_y))
+            # Save the image or encode it as a base64 string if needed
+            image_url = save_image_or_get_url(
+                new_image)  # Define this function to save or generate the URL for the image
+            # Include the image as part of the chat history
+            image_html = f'<img src="{image_url}" alt="Processed Image" style="max-width: 100%; max-height: 200px;">'
+            chat_history = chat_history + [[None, (image_url,)]]
+    return chat_history
+    # Read the image and encode it in base64
+    # Simulated response
+    response = f"Received input: {input_text}\nKey: {key}\nImage uploaded successfully!"
+    return chat_history + [[input_text, response]]
+# Create the Gradio interface
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(label="Upload Image", type="pil", interactive=True)
+        with gr.Column():
+            chatbot = gr.Chatbot(label="Chatbot Interaction", height=400)
     with gr.Row():
+        user_input = gr.Textbox(label="Type your message here", placeholder="Enter your message...")
+        key_input = gr.Textbox(label="API Key", placeholder="Enter your key...", type="password")
+    # Button to submit
+    submit_button = gr.Button("Submit")
+    # Initialize chat history
+    chat_history = gr.State(value=[])
+    # Set interactions
+    submit_button.click(
+        fn=chatbot_response,
+        inputs=[user_input, image_input, key_input, chat_history],
+        outputs=[chatbot],
+    )
+# Launch the app
+demo.launch()