Spaces:

RioJune
/

AG-KD

Sleeping

App Files Files Community

RioJune commited on Mar 1

Commit

a40ac25

1 Parent(s): fba28b5

update

Browse files

Files changed (9) hide show

app.py +386 -0
configs/experiment.yaml +36 -0
configs/padchest_definition.yaml +24 -0
configs/vindr_definition.yaml +22 -0
examples/26746130963764173994750391023442607773-2_mukhp1.png +0 -0
examples/f1eb2216d773ced6330b1f31e18f04f8.png +0 -0
examples/fb4dfacc089f4b5550f03f52e706b6f2.png +0 -0
examples/prompt.yaml +8 -0
requirements.txt +11 -0

app.py ADDED Viewed

	@@ -0,0 +1,386 @@

+import streamlit as st
+from PIL import Image
+import torch
+from transformers import AutoModelForCausalLM, AutoProcessor
+import numpy as np
+import supervision as sv
+import albumentations as A
+import cv2
+from transformers import AutoConfig
+import yaml
+# Set Streamlit page configuration for a wide layout
+st.set_page_config(layout="wide")
+# Custom CSS for better layout and mobile responsiveness
+st.markdown("""
+    <style>
+        .main {
+            max-width: 1200px;  /* Max width for content */
+            margin: 0 auto;
+        }
+        .block-container {
+            padding-top: 2rem;
+            padding-bottom: 2rem;
+            padding-left: 3rem;
+            padding-right: 3rem;
+        }
+        .title {
+            font-size: 2.5rem;
+            text-align: center;
+            color: #FF6347;
+        }
+        .subheader {
+            font-size: 1.5rem;
+            margin-bottom: 20px;
+        }
+        .btn {
+            font-size: 1.1rem;
+            padding: 10px 20px;
+            background-color: #FF6347;
+            color: white;
+            border-radius: 5px;
+            border: none;
+            cursor: pointer;
+        }
+        .btn:hover {
+            background-color: #FF4500;
+        }
+        .column-spacing {
+            display: flex;
+            justify-content: space-between;
+        }
+        .col-half {
+            width: 48%;
+        }
+        .col-full {
+            width: 100%;
+        }
+        .instructions {
+            padding: 20px;
+            background-color: #f9f9f9;
+            border-radius: 8px;
+            box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
+        }
+    </style>
+""", unsafe_allow_html=True)
+# Load Model and Processor
+@st.cache_resource
+def load_model():
+    REVISION = 'refs/pr/6'
+    MODEL_NAME = "RioJune/AG-KD"
+    # MODEL_NAME = '/u/home/lj0/Checkpoints/AD-KD-MICCAI25'
+    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    config_model = AutoConfig.from_pretrained ("microsoft/Florence-2-base-ft", trust_remote_code=True)
+    config_model.vision_config.model_type = "davit"
+    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, trust_remote_code=True, config=config_model).to(DEVICE)
+    BASE_PROCESSOR = "microsoft/Florence-2-base-ft"
+    processor = AutoProcessor.from_pretrained(BASE_PROCESSOR, trust_remote_code=True)
+    processor.image_processor.size = 512
+    processor.image_processor.crop_size = 512
+    return model, processor, DEVICE
+model, processor, DEVICE = load_model()
+# Load Definitions
+@st.cache_resource
+def load_definitions():
+    vindr_path = 'configs/vindr_definition.yaml'
+    padchest_path = 'configs/padchest_definition.yaml'
+    prompt_path = 'examples/prompt.yaml'
+    with open(vindr_path, 'r') as file:
+        vindr_definitions = yaml.safe_load(file)
+    with open(padchest_path, 'r') as file:
+        padchest_definitions = yaml.safe_load(file)
+    with open(prompt_path, 'r') as file:
+        prompt_definitions = yaml.safe_load(file)
+    return vindr_definitions, padchest_definitions, prompt_definitions
+vindr_definitions, padchest_definitions, prompt_definitions = load_definitions()
+dataset_options = {"Vindr": vindr_definitions, "PadChest": padchest_definitions}
+def load_example_images():
+    return list(prompt_definitions.keys())
+example_images = load_example_images()
+def apply_transform(image, size_mode=512):
+    pad_resize_transform = A.Compose([
+        A.LongestMaxSize(max_size=size_mode, interpolation=cv2.INTER_AREA),
+        A.PadIfNeeded(min_height=size_mode, min_width=size_mode, border_mode=cv2.BORDER_CONSTANT, value=(0, 0, 0)),
+        A.Resize(height=512, width=512, interpolation=cv2.INTER_AREA),
+    ])
+    image_np = np.array(image)
+    transformed = pad_resize_transform(image=image_np)
+    return transformed["image"]
+# Streamlit UI with Colorful Title and Emojis
+st.markdown("<h1 class='title'>🩺 Enhancing Abnormality Grounding for Vision Language Models with Knowledge Descriptions 🚀</h1>", unsafe_allow_html=True)
+st.markdown(
+    "<p style='text-align: center; font-size: 18px;'>Welcome to a simple demo of our work! 🎉 Choose an example or upload your own image to get started! 👇</p>",
+    unsafe_allow_html=True
+)
+# Display Example Images First
+st.subheader("🌄 Example Images")
+selected_example = st.selectbox("Choose an example", example_images)
+image = Image.open(selected_example).convert("RGB")
+example_diseases = prompt_definitions.get(selected_example, [])
+st.write("**Associated Diseases:**", ", ".join(example_diseases))
+# Layout for Original Image and Instructions
+col1, col2 = st.columns([1, 2])
+# Left column for original image
+with col1:
+    st.image(image, caption=f"Original Example Image: {selected_example}", width=400)
+# Right column for Instructions and Run Inference Button
+with col2:
+    st.subheader("⚙️ Instructions to Get Started:")
+    st.write("""
+        - **Run Inference**: Click the "Run Inference on Example" button to process the image and display the results.
+        - **Choose an Example**: 🌄 Select an example image from the dataset to view its associated diseases.
+        - **Upload Your Own Image**: 📤 Upload an image of your choice to analyze it for diseases.
+        - **Select Dataset**: 📚 Choose between available datasets (Vindr or PadChest) for disease information.
+        - **Select Disease**: 🦠 Pick the disease to be analyzed from the list of diseases in the selected dataset.
+    """)
+    st.subheader("⚠️ Warning:")
+    st.write("""
+    - **🚫 Please avoid uploading non-frontal chest X-ray images.** Our model has been specifically trained on **frontal chest X-ray images** only.
+    - This demo is intended for **🔬 research purposes only** and should **❌ not be used for medical diagnoses**.
+    - The model’s responses may contain **<span style='color:#dc3545; font-weight:bold;'>🤖 hallucinations or incorrect information</span>**.
+    - Always consult a **<span style='color:#dc3545; font-weight:bold;'>👨‍⚕️ medical professional</span>** for accurate diagnosis and advice.
+""", unsafe_allow_html=True)
+    st.markdown("</div>", unsafe_allow_html=True)
+# Run Inference Button
+if st.button("Run Inference on Example", key="example"):
+    if image is None:
+        st.error("❌ Please select an example image first.")
+    else:
+        # Use the selected example's disease and definition for inference
+        disease_choice = example_diseases[0] if example_diseases else ""
+        definition = vindr_definitions.get(disease_choice, padchest_definitions.get(disease_choice, ""))
+        # Generate the prompt for the model
+        det_obj = f"{disease_choice} means {definition}."
+        st.write(f"**Definition:** {definition}")
+        prompt = f"Locate the phrases in the caption: {det_obj}."
+        prompt = f"<CAPTION_TO_PHRASE_GROUNDING>{prompt}"
+        # Prepare the image and input
+        np_image = np.array(image)
+        inputs = processor(text=[prompt], images=[np_image], return_tensors="pt", padding=True).to(DEVICE)
+        with st.spinner("Processing... ⏳"):
+            outputs = model.generate(
+                input_ids=inputs["input_ids"],
+                pixel_values=inputs["pixel_values"],
+                max_new_tokens=1024,
+                num_beams=3,
+                output_scores=True,  # Make sure we get the scores/logits
+                return_dict_in_generate=True  # Ensures you get both sequences and scores in the output
+                )
+            # Ensure transition_scores is properly extracted
+            transition_scores = model.compute_transition_scores(
+                outputs.sequences, outputs.scores, outputs.beam_indices, normalize_logits=False
+            )
+            # Get the generated token IDs (ignoring the input tokens part)
+            generated_ids = outputs.sequences
+            generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+            # Get input length
+            input_length = inputs.input_ids.shape[1]
+            generated_tokens = outputs.sequences
+            # Calculate output length (number of generated tokens)
+            output_length = np.sum(transition_scores.cpu().numpy() < 0, axis=1)
+            # Get length penalty
+            length_penalty = model.generation_config.length_penalty
+            # Calculate total score for the generated sentence
+            reconstructed_scores = transition_scores.cpu().sum(axis=1) / (output_length**length_penalty)
+            # Convert log-probability to probability (0-1 range)
+            probabilities = np.exp(reconstructed_scores.cpu().numpy())
+            # Streamlit UI to display the result
+            st.markdown(f"**🎯 Probability of the Results:** <span style='color:#28a745; font-size:24px; font-weight:bold;'>{probabilities[0] * 100:.2f}%</span>", unsafe_allow_html=True)
+            predictions = processor.post_process_generation(generated_text, task="<CAPTION_TO_PHRASE_GROUNDING>", image_size=np_image.shape[:2])
+            detection = sv.Detections.from_lmm(sv.LMM.FLORENCE_2, predictions, resolution_wh=np_image.shape[:2])
+            # Annotate the image with bounding boxes and labels
+            bounding_box_annotator = sv.BoundingBoxAnnotator(color_lookup=sv.ColorLookup.INDEX)
+            label_annotator = sv.LabelAnnotator(color_lookup=sv.ColorLookup.INDEX)
+            image_with_predictions = bounding_box_annotator.annotate(np_image.copy(), detection)
+            image_with_predictions = label_annotator.annotate(image_with_predictions, detection)
+            annotated_image = Image.fromarray(image_with_predictions.astype(np.uint8))
+            # Display the original and result images side by side
+            col1, col2 = st.columns([1, 1])
+            with col1:
+                st.image(image, caption=f"Original Image: {selected_example}", width=400)
+            with col2:
+                st.image(annotated_image, caption="Inference Results 🖼️", width=400)
+            # Display the generated text
+            st.write("**Generated Text:**", generated_text)
+# Upload Image section
+st.subheader("📤 Upload Your Own Image")
+col1, col2 = st.columns([1, 1])
+with col1:
+    dataset_choice = st.selectbox("Select Dataset 📚", options=list(dataset_options.keys()))
+    disease_options = list(dataset_options[dataset_choice].keys())
+with col2:
+    disease_choice = st.selectbox("Select Disease 🦠", options=disease_options)
+uploaded_file = st.file_uploader("Upload an Image", type=["png", "jpg", "jpeg"])
+col1, col2 = st.columns([1, 2])
+with col1:
+    # Handle file upload
+    if uploaded_file:
+        image = Image.open(uploaded_file).convert("RGB")
+        image = apply_transform(image)  # Ensure the uploaded image is transformed correctly
+        st.image(image, caption="Uploaded Image", width=400)
+        # Let user select dataset and disease dynamically
+        disease_choice = disease_choice if disease_choice else example_diseases[0]
+        # Get Definition Priority: Dataset -> User Input
+        definition = vindr_definitions.get(disease_choice, padchest_definitions.get(disease_choice, ""))
+        if not definition:
+            definition = st.text_input("Enter Definition Manually 📝", value="")
+with col2:
+    # Instructions and warnings
+    st.subheader("⚙️ Instructions to Get Started:")
+    st.write("""
+        - **Run Inference**: Click the "Run Inference on Example" button to process the image and display the results.
+        - **Choose an Example**: 🌄 Select an example image from the dataset to view its associated diseases.
+        - **Upload Your Own Image**: 📤 Upload an image of your choice to analyze it for diseases.
+        - **Select Dataset**: 📚 Choose between available datasets (Vindr or PadChest) for disease information.
+        - **Select Disease**: 🦠 Pick the disease to be analyzed from the list of diseases in the selected dataset.
+    """)
+    st.subheader("⚠️ Warning:")
+    st.write("""
+    - **🚫 Please avoid uploading non-frontal chest X-ray images.** Our model has been specifically trained on **frontal chest X-ray images** only.
+    - This demo is intended for **🔬 research purposes only** and should **❌ not be used for medical diagnoses**.
+    - The model’s responses may contain **<span style='color:#dc3545; font-weight:bold;'>🤖 hallucinations or incorrect information</span>**.
+    - Always consult a **<span style='color:#dc3545; font-weight:bold;'>👨‍⚕️ medical professional</span>** for accurate diagnosis and advice.
+""", unsafe_allow_html=True)
+# Run inference after upload
+if st.button("Run Inference 🏃‍♂️"):
+    if image is None:
+        st.error("❌ Please upload an image or select an example.")
+    else:
+        det_obj = f"{disease_choice} means {definition}."
+        st.write(f"**Definition:** {definition}")
+        # Construct Prompt with Disease Definition
+        prompt = f"Locate the phrases in the caption: {det_obj}."
+        prompt = f"<CAPTION_TO_PHRASE_GROUNDING>{prompt}"
+        np_image = np.array(image)
+        inputs = processor(text=[prompt], images=[np_image], return_tensors="pt", padding=True).to(DEVICE)
+        with st.spinner("Processing... ⏳"):
+            # generated_ids = model.generate(input_ids=inputs["input_ids"], pixel_values=inputs["pixel_values"], max_new_tokens=1024, num_beams=3)
+            # generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+            outputs = model.generate(
+                input_ids=inputs["input_ids"],
+                pixel_values=inputs["pixel_values"],
+                max_new_tokens=1024,
+                num_beams=3,
+                output_scores=True,  # Make sure we get the scores/logits
+                return_dict_in_generate=True  # Ensures you get both sequences and scores in the output
+                )
+            transition_scores = model.compute_transition_scores(
+                outputs.sequences, outputs.scores, outputs.beam_indices, normalize_logits=False
+            )
+            # Get the generated token IDs (ignoring the input tokens part)
+            generated_ids = outputs.sequences
+            generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+            # Get input length
+            input_length = inputs.input_ids.shape[1]
+            # Extract generated tokens (ignoring the input tokens)
+            # generated_tokens = outputs.sequences[:, input_length:]
+            generated_tokens = outputs.sequences
+            # Calculate output length (number of generated tokens)
+            output_length = np.sum(transition_scores.cpu().numpy() < 0, axis=1)
+            # Get length penalty
+            length_penalty = model.generation_config.length_penalty
+            # Calculate total score for the generated sentence
+            reconstructed_scores = transition_scores.cpu().sum(axis=1) / (output_length**length_penalty)
+            # Convert log-probability to probability (0-1 range)
+            probabilities = np.exp(reconstructed_scores.cpu().numpy())
+            # Streamlit UI to display the result
+            # st.write(f"**Probability of the Results (0-1):** {probabilities[0]:.4f}")
+            st.markdown(f"**🎯 Probability of the Results:** <span style='color:green; font-size:24px; font-weight:bold;'>{probabilities[0] * 100:.2f}%</span>", unsafe_allow_html=True)
+            predictions = processor.post_process_generation(generated_text, task="<CAPTION_TO_PHRASE_GROUNDING>", image_size=np_image.shape[:2])
+            detection = sv.Detections.from_lmm(sv.LMM.FLORENCE_2, predictions, resolution_wh=np_image.shape[:2])
+            bounding_box_annotator = sv.BoundingBoxAnnotator(color_lookup=sv.ColorLookup.INDEX)
+            label_annotator = sv.LabelAnnotator(color_lookup=sv.ColorLookup.INDEX)
+            image_with_predictions = bounding_box_annotator.annotate(np_image.copy(), detection)
+            image_with_predictions = label_annotator.annotate(image_with_predictions, detection)
+            annotated_image = Image.fromarray(image_with_predictions.astype(np.uint8))
+            # Create two columns to display the original and the results side by side
+            col1, col2 = st.columns([1, 1])
+            # Left column for original image
+            with col1:
+                st.image(image, caption="Uploaded Image", width=400)
+            # Right column for result image
+            with col2:
+                st.image(annotated_image, caption="Inference Results 🖼️", width=400)
+            # Display the generated text
+            st.write("**Generated Text:**", generated_text)

configs/experiment.yaml ADDED Viewed

	@@ -0,0 +1,36 @@

+# Experiment 1 Configuration
+model:
+  model_type: "microsoft/Florence-2-base-ft"
+  lora_config: "configs/lora_config.yaml"
+  init_checkpoint: "checkpoints/mimic_model_init.pt"
+  processor:
+    image_size: 512
+    crop_size: 512
+  peft:
+    use_peft: False
+    lora_checkpoint: None
+  finetune: true # true
+trainer:
+  checkpoint_dir: "../outputs"
+  project_name: "Knowledge-AG" # change to your own wandb project name
+  entity_name: "compai"  # change to your own wandb entity name
+  max_epochs: 50
+  train_batch_size: 16
+  valid_batch_size: 16
+  num_workers: 28
+  log_every_n_steps: 100
+  gpu: 0
+  ddp: true
+  optimizer: "adamw"
+  learning_rate: 3e-6 #5e-6
+  weight_decay: 0.01
+dataset:
+  vindr:
+    img_root: "/vol/ciamspace/datasets/X-ray/vindr-cxr/processed/images_512/"
+    annotation_csv: "/u/home/lj0/Code/AG-KD-miccai25/annotations/vindr_dataset.csv"
+    data_pct: 1.0

configs/padchest_definition.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+pleural thickening: "Increased thickness of the pleura seen as a dense layer around the lung."
+atelectasis: "Collapsed lung tissue causing darkened or shrunken areas in the lung."
+pleural effusion: "Excess fluid in the pleural space appearing as a shadow around the lungs."
+cardiomegaly: "Enlargement of the heart seen when the heart appears larger than normal."
+aortic elongation: "Lengthened and tortuous aorta, visible as an elongated curving structure."
+vertebral degenerative changes: "Irregular vertebral margins with bony sclerosis and osteophytes."
+aortic atheromatosis: "Calcified deposits in the aortic wall appearing as bright, irregular opacities."
+nodule: "A growth or lump in the lung which may appear as a well-defined or irregular shape."
+alveolar pattern: "Cloud-like, patchy opacities representing fluid or cellular accumulation in alveoli."
+hiatal hernia: "A soft-tissue mass or air-fluid level above the diaphragm, near the midline."
+scoliosis: "Sideways curvature of the spine causing  misalignment of vertebral bodies."
+hemidiaphragm elevation: "One side of the diaphragm appearing higher than the other, with convex shape."
+hyperinflated lung: "Abnormally increased lung volume with expanded air spaces."
+interstitial pattern: "Fine reticular or nodular opacities spread across the lung, indicating interstitial involvement."
+fracture: "A break in the bone appearing as a radiolucent line or displacement."
+vascular hilar enlargement: "Increased prominence of the pulmonary vessels near the lung hila."
+nsg tube: "A thin radiopaque tube extending from the nasal cavity into the stomach."
+endotracheal tube: "A thin or opaque line in the middle of the trachea.  "
+hypoexpansion: "Reduced lung inflation with increased density and narrow intercostal spaces."
+central venous catheter: "A visible line inside large vein."
+electrical device: "A dense, well-defined metallic opacity, typically a pacemaker or defibrillator."
+bronchiectasis: "Dilated bronchi with thick walls, appearing as tubular or cystic opacities."
+goiter: "A soft tissue mass in the anterior neck, sometimes displacing the trachea."
+other entities: "An unusual mass or area in the lung with irregular borders or density."

configs/vindr_definition.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+lung opacity: "An area of increased density in the lung fields typically appearing as a white or grayish patch."
+infiltration: "Accumulation of substances or cells in the lung tissue visible as increased density or nodules."
+consolidation: "Lung tissue filled with fluid or cells causing dense solid areas on imaging."
+nodule or mass: "A growth or lump in the lung which may appear as a well-defined or irregular shape."
+pleural thickening: "Increased thickness of the pleura seen as a dense layer around the lung."
+aortic enlargement: "Widening of the aorta visible as an enlarged artery on imaging."
+pulmonary fibrosis: "Scarring of the lung tissue creating a dense fibrous appearance."
+ild: "Scarring or inflammation of the lung’s interstitial tissue creating a reticular or nodular pattern."
+cardiomegaly: "Enlargement of the heart seen when the heart appears larger than normal."
+other lesion: "An unusual mass or area in the lung with irregular borders or density."
+pleural effusion: "Excess fluid in the pleural space appearing as a shadow around the lungs."
+calcification: "Calcium deposits in lung tissue visible as bright white spots."
+enlarged pa: "Widening of the pulmonary artery seen as an enlarged artery in the chest."
+lung cavity: "Air-filled spaces within the lung often surrounded by dense tissue."
+atelectasis: "Collapsed lung tissue causing darkened or shrunken areas in the lung."
+mediastinal shift: "Displacement of central chest structures like the heart to one side."
+lung cyst: "Fluid-filled spaces in the lung often round with thin walls."
+pneumothorax: "Air trapped in the pleural space creating a gap or absence of lung tissue."
+emphysema: "Enlarged air spaces in the lungs appearing over-expanded or damaged."
+clavicle fracture: "A break in the collarbone seen as a gap or irregularity in the bone."
+rib fracture: "A break in one or more ribs appearing as a visible crack or displacement."
+edema: "Fluid accumulation in the lungs creating a hazy or clouded area."

examples/26746130963764173994750391023442607773-2_mukhp1.png ADDED Viewed

examples/f1eb2216d773ced6330b1f31e18f04f8.png ADDED Viewed

examples/fb4dfacc089f4b5550f03f52e706b6f2.png ADDED Viewed

examples/prompt.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+./examples/26746130963764173994750391023442607773-2_mukhp1.png:
+  - electrical device
+./examples/f1eb2216d773ced6330b1f31e18f04f8.png:
+  - pulmonary fibrosis
+./examples/fb4dfacc089f4b5550f03f52e706b6f2.png:
+  - cardiomegaly

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+streamlit
+torch
+transformers
+pillow
+numpy
+supervision
+albumentations
+opencv-python
+pyyaml
+einops
+timm