Spaces:

Samhugs07
/

Perl-To-Python

Running

File size: 8,217 Bytes

651ddf8

#!/usr/bin/env python
# coding: utf-8

# # Perl to Python Code Generator
# 
# The requirement: use a Frontier model to generate high performance Python code from Perl code
# 

# In[ ]:


#get_ipython().system('pip install -q transformers huggingface_hub')


# In[ ]:

# imports

import os
import io
import sys
from dotenv import load_dotenv
from openai import OpenAI
import google.generativeai
import anthropic
import gradio as gr
import subprocess
import json
#for Hugging face end points
from huggingface_hub import login, InferenceClient
from transformers import AutoTokenizer


# In[ ]:


# environment

load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')
##for connecting to HF End point
hf_token = os.environ['HF_TOKEN']
login(hf_token, add_to_git_credential=True)


# In[ ]:


# initialize
# NOTE - option to use ultra-low cost models by uncommenting last 2 lines

openai = OpenAI()
claude = anthropic.Anthropic()
OPENAI_MODEL = "gpt-4o"
CLAUDE_MODEL = "claude-3-5-sonnet-20240620"

# Want to keep costs ultra-low? Uncomment these lines:
#OPENAI_MODEL = "gpt-4o-mini"
#CLAUDE_MODEL = "claude-3-haiku-20240307"

#To access open source models from Hugging face end points
code_qwen = "Qwen/CodeQwen1.5-7B-Chat"
code_gemma = "google/codegemma-7b-it"
CODE_QWEN_URL = "https://u9pv0u7a6uxrjubt.us-east-1.aws.endpoints.huggingface.cloud" #Paste your end point URL from Hugging face
CODE_GEMMA_URL = "https://c5hggiyqachmgnqg.us-east-1.aws.endpoints.huggingface.cloud" #Paste your end point URL from Hugging face


# In[ ]:


system_message = "You are an assistant that reimplements Perl scripts code into a high performance Python for a Windows 11 PC. "
system_message += "Respond only with Python code; use comments sparingly and do not provide any explanation other than occasional comments preceded by a # tag."
system_message += "The Python response needs to produce an identical output in the fastest possible time."


# In[ ]:


def user_prompt_for(perl):
    user_prompt = "Rewrite this Perl scripts code in Python with the fastest possible implementation that produces identical output in the least time. "
    user_prompt += "Respond only with Python code; do not explain your work other than a few comments. "
    user_prompt += "Pay attention to number types to ensure no int overflows. Remember to #include all necessary python libraries as needed,\
    such as requests, os, json etc.\n\n"
    user_prompt += perl
    return user_prompt


# In[ ]:


def messages_for(perl):
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt_for(perl)}
    ]


# In[ ]:


# write to a file

def write_output(python, file_path):
    # Get the base filename from the path
    base_name = os.path.basename(file_path)
    filename = os.path.splitext(base_name)[0]
    code = python.replace("```python","").replace("```","")
    output_file = f"{filename}.py"
    with open(output_file, "w") as f:
        f.write(code)
    return output_file


# In[ ]:


def stream_gpt(perl, file_path):    
    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(perl), stream=True)
    reply = ""
    for chunk in stream:
        fragment = chunk.choices[0].delta.content or ""
        reply += fragment
        cleaned_reply = reply.replace('```python\n','').replace('```','')
        yield cleaned_reply, None
    yield cleaned_reply, write_output(cleaned_reply, file_path)
        


# In[ ]:


def stream_claude(perl, file_path):
    result = claude.messages.stream(
        model=CLAUDE_MODEL,
        max_tokens=2000,
        system=system_message,
        messages=[{"role": "user", "content": user_prompt_for(perl)}],
    )
    reply = ""
    with result as stream:
        for text in stream.text_stream:
            reply += text
        cleaned_reply = reply.replace('```python\n','').replace('```','')
        yield cleaned_reply, None
    yield cleaned_reply, write_output(cleaned_reply, file_path)


# In[ ]:


def stream_code_qwen(perl, file_path):
    tokenizer = AutoTokenizer.from_pretrained(code_qwen)
    messages = messages_for(perl)
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    client = InferenceClient(CODE_QWEN_URL, token=hf_token)
    stream = client.text_generation(text, stream=True, details=True, max_new_tokens=3000)
    result = ""
    for r in stream:
        result += r.token.text
        cleaned_reply = result.replace('```python\n','').replace('```','')
        yield cleaned_reply, None
    yield cleaned_reply, write_output(cleaned_reply, file_path)


# In[ ]:


def generate(perl_script, model, file_path):
    if model=="GPT":
        for result, file in stream_gpt(perl_script, file_path):
            yield result, file
        yield result, file
    elif model=="Claude":
        for result, file in stream_claude(perl_script, file_path):
            yield result, file
        yield result, file
    elif model=="CodeQwen":
        for result, file in stream_code_qwen(perl_script, file_path):
            yield result, file
        yield result, file
    else:
        raise ValueError("Unknown model")


# In[ ]:


def execute_perl(perl_code):

    import subprocess
    #print(perl_file)
    #perl_path = r"E:\Softwares\Perl\perl\bin\perl.exe"
    # Run Perl script from Jupyter Lab
    result = subprocess.run(["perl", '-e', perl_code], capture_output=True, text=True)

    # Return the output of the Perl script
    return result.stdout
    


# In[ ]:


def execute_python(code):
    try:
        output = io.StringIO()
        sys.stdout = output
        exec(code)
    finally:
        sys.stdout = sys.__stdout__
    return output.getvalue()


# In[ ]:


css = """
.perl {background-color: #093645;}
.python {background-color: #0948;}
"""

force_dark_mode = """
function refresh() {
    const url = new URL(window.location);
    if (url.searchParams.get('__theme') !== 'dark') {
        url.searchParams.set('__theme', 'dark');
        window.location.href = url.href;
    }
}
"""


# In[ ]:


with gr.Blocks(css=css, js=force_dark_mode) as ui:

    gr.HTML("<h2 style='text-align: center; color: white;'> PERL to Python Code Generator</h2>")
    with gr.Row(scale=0, equal_height=True):
        model = gr.Dropdown(["GPT", "Claude", "CodeQwen"], label="Select model", value="GPT")
        perl_file = gr.File(label="Upload Perl Script:")
        convert = gr.Button("Convert to Python")
        file_output = gr.File(label="Download Python script", visible=False)
    with gr.Row():
        perl_script = gr.Textbox(label="Perl Script:")
        python_script = gr.Textbox(label="Converted Python Script:")        
    with gr.Row():
        perl_run = gr.Button("Run PERL")
        python_run = gr.Button("Run Python")
    with gr.Row():
        perl_out = gr.TextArea(label="PERL Result:", elem_classes=["perl"])
        python_out = gr.TextArea(label="Python Result:", elem_classes=["python"])
    with gr.Row():        
        clear_button = gr.Button("Clear")
    
    def extract_perl_code(file):
        if file is None:
            return "No file uploaded."
        with open(file.name, "r", encoding="utf-8") as f:
            perl_code = f.read()
        return perl_code

    convert.click(extract_perl_code, inputs=[perl_file], outputs=[perl_script]).then(
        generate, inputs=[perl_script, model, perl_file], outputs=[python_script, file_output]).then(
        lambda file_output: gr.update(visible=True), inputs=[file_output], outputs=[file_output]
    )

    perl_run.click(execute_perl, inputs=[perl_script], outputs=[perl_out])
    python_run.click(execute_python, inputs=[python_script], outputs=[python_out]) 

    def clear_all():
        return None, "", "", gr.update(visible=False), "", ""

    clear_button.click(
        clear_all,
        outputs=[perl_file, perl_script, python_script, file_output, perl_out, python_out]
    )

ui.launch(inbrowser=True)


# In[ ]: