Spaces:

JFoz
/

FociMapper

Running

App Files Files Community

JFoz commited on Aug 29, 2023

Commit

6756e43

1 Parent(s): 016c3b8

Show screening, permit screening distance to be changed

Browse files

Files changed (5) hide show

app.py +7 -4
path_analysis/analyse.py +50 -26
path_analysis/data_preprocess.py +40 -40
tests/test_analyse.py +77 -20
tests/test_preprocess.py +2 -2

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import numpy as np
 # Function to preview the imported image
 def preview_image(file1):
     if file1:
         im = imread(file1.name)
         print(im.ndim, im.shape)
         if im.ndim>2:
@@ -41,6 +42,7 @@ with gr.Blocks() as demo:
                 threshold_type = gr.Radio(["per-trace", "per-cell"], label="Threshold-type", value="per-trace", interactive=True)
                 use_corrected_positions = gr.Checkbox(label="Correct foci position measurements", value=True, interactive=True)
         # The output column showing the result of processing
@@ -52,15 +54,16 @@ with gr.Blocks() as demo:
             data_file_output=gr.File(label="Output data file (.csv)")
-    def process(cellid_input, image_input, path_input, sphere_radius, peak_threshold, xy_res, z_res, threshold_type, use_corrected_positions):
         config = { 'sphere_radius': sphere_radius,
                    'peak_threshold': peak_threshold,
                    'xy_res': xy_res,
                    'z_res': z_res,
                    'threshold_type': threshold_type,
-                   'use_corrected_positions': use_corrected_positions
-        }
         paths, traces, fig, extracted_peaks = analyse_paths(cellid_input, image_input.name, path_input.name, config)
@@ -71,7 +74,7 @@ with gr.Blocks() as demo:
     with gr.Row():
         greet_btn = gr.Button("Process")
-        greet_btn.click(fn=process, inputs=[cellid_input, image_input, path_input, sphere_radius, peak_threshold, xy_res, z_res, threshold_type, use_corrected_positions], outputs=[trace_output, image_output, plot_output, data_output, data_file_output], api_name="process")
 if __name__ == "__main__":

 # Function to preview the imported image
 def preview_image(file1):
     if file1:
+        print('Uploading image', file1.name)
         im = imread(file1.name)
         print(im.ndim, im.shape)
         if im.ndim>2:
                 threshold_type = gr.Radio(["per-trace", "per-cell"], label="Threshold-type", value="per-trace", interactive=True)
                 use_corrected_positions = gr.Checkbox(label="Correct foci position measurements", value=True, interactive=True)
+                screening_distance = gr.Number(label='Screening distance (voxels)', value=10, interactive=True)
         # The output column showing the result of processing
             data_file_output=gr.File(label="Output data file (.csv)")
+    def process(cellid_input, image_input, path_input, sphere_radius, peak_threshold, xy_res, z_res, threshold_type, use_corrected_positions, screening_distance):
         config = { 'sphere_radius': sphere_radius,
                    'peak_threshold': peak_threshold,
                    'xy_res': xy_res,
                    'z_res': z_res,
                    'threshold_type': threshold_type,
+                   'use_corrected_positions': use_corrected_positions,
+                   'screening_distance': screening_distance,
+                  }
         paths, traces, fig, extracted_peaks = analyse_paths(cellid_input, image_input.name, path_input.name, config)
     with gr.Row():
         greet_btn = gr.Button("Process")
+        greet_btn.click(fn=process, inputs=[cellid_input, image_input, path_input, sphere_radius, peak_threshold, xy_res, z_res, threshold_type, use_corrected_positions, screening_distance], outputs=[trace_output, image_output, plot_output, data_output, data_file_output], api_name="process")
 if __name__ == "__main__":

path_analysis/analyse.py CHANGED Viewed

@@ -53,6 +53,7 @@ def calculate_path_length_partials(point_list, voxel_size=(1,1,1)):
     section_lengths = [0.0]
     s = np.array(voxel_size)
     for i in range(len(point_list)-1):
         section_lengths.append(la.norm(s * (np.array(point_list[i+1]) - np.array(point_list[i]))))
     return np.cumsum(section_lengths)
@@ -89,7 +90,7 @@ def visualise_ordering(points_list, dim, wr=5, wc=5):
 col_map = [(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255), (0,255,255),
            (255,127,0), (255, 0, 127), (127, 255, 0), (0, 255, 127), (127,0,255), (0,127,255)]
-def draw_paths(all_paths, foci_stack, foci_index=None, r=3):
     """
     Draws paths on the provided image stack and overlays markers for the foci
@@ -98,7 +99,7 @@ def draw_paths(all_paths, foci_stack, foci_index=None, r=3):
         foci_stack (np.array): 3D numpy array representing the image stack.
         foci_index (list, optional): List of list of focus indices (along each path). Defaults to None.
         r (int, optional): Radius for the ellipse or line drawing around the focus. Defaults to 3.
     Returns:
         PIL.Image.Image: An image with the drawn paths.
     """
@@ -110,13 +111,20 @@ def draw_paths(all_paths, foci_stack, foci_index=None, r=3):
     for i, (p, col) in enumerate(zip(all_paths, cycle(col_map))):
         draw.line([(u[0], u[1]) for u in p], fill=col)
         draw.text((p[0][0], p[0][1]), str(i+1), fill=col)
     if foci_index is not None:
         for i, (idx, p, col) in enumerate(zip(foci_index, all_paths, cycle(col_map))):
             if len(idx):
                 for j in idx:
                     draw.line((int(p[j][0]-r), int(p[j][1]), int(p[j][0]+r), int(p[j][1])), fill=col, width=2)
                     draw.line((int(p[j][0]), int(p[j][1]-r), int(p[j][0]), int(p[j][1]+r)), fill=col, width=2)
     return im
@@ -164,8 +172,7 @@ def make_mask_s(p, melem, measure_stack):
     #
     R = [u//2 for u in melem.shape]
     r, c, z = p
     mask = np.zeros(melem.shape)
@@ -210,7 +217,8 @@ def make_sphere(R=5, z_scale_ratio=2.3):
     Generate a binary representation of a sphere in 3D space.
     Args:
-        R (int, optional): Radius of the sphere. Default is 5.
         z_scale_ratio (float, optional): Scaling factor for the z-axis. Default is 2.3.
     Returns:
@@ -243,25 +251,26 @@ def measure_all_with_sphere(points_list, measure_stack, op='mean', R=5, z_scale_
 # Measure fluorescence levels along ordered skeleton
-def measure_chrom2(path, hei10, config):
     """
     Measure fluorescence levels along an ordered skeleton.
     Args:
         path (list): List of ordered path points (r, c, z).
-        hei10 (numpy.ndarray): 3D fluorescence data.
         config (dict): Configuration dictionary containing 'z_res', 'xy_res', and 'sphere_radius' values.
     Returns:
         tuple: A tuple containing the visualization, mean measurements, and max measurements along the path.
     """
     scale_ratio = config['z_res']/config['xy_res']
     sphere_xy_radius = int(math.ceil(config['sphere_radius']/config['xy_res']))
-    vis = visualise_ordering(path, dim=hei10.shape, wr=sphere_xy_radius, wc=sphere_xy_radius)
-    measurements = measure_all_with_sphere(path, hei10, op='mean', R=sphere_xy_radius, z_scale_ratio=scale_ratio)
-    measurements_max = measure_all_with_sphere(path, hei10, op='max', R=sphere_xy_radius, z_scale_ratio=scale_ratio)
     return vis, measurements, measurements_max
@@ -290,20 +299,22 @@ def extract_peaks(cell_id, all_paths, path_lengths, measured_traces, config):
     n_paths = len(all_paths)
     data = []
-    foci_absolute_intensity, foci_position, foci_position_index, dominated_foci_data, trace_median_intensities, trace_thresholds = analyse_traces(all_paths, path_lengths, measured_traces, config)
     foci_intensities = []
     for path_foci_abs_int, tmi in zip(foci_absolute_intensity, trace_median_intensities):
         foci_intensities.extend(list(path_foci_abs_int - tmi))
     mean_intensity = np.mean(foci_intensities)
     trace_positions = []
     for i in range(n_paths):
         pl = calculate_path_length_partials(all_paths[i], (config['xy_res'], config['xy_res'], config['z_res']))
-        print(i, len(all_paths[i]), len(pl))
         path_data = { 'Cell_ID':cell_id,
                       'Trace': i+1,
@@ -311,17 +322,23 @@ def extract_peaks(cell_id, all_paths, path_lengths, measured_traces, config):
                       'Measured_trace_length(um)': pl[-1],
                       'Trace_median_intensity': trace_median_intensities[i],
                       'Detection_sphere_radius(um)': config['sphere_radius'],
-                      'Foci_ID_threshold': config['peak_threshold'] }
         for j, (idx, u,v) in enumerate(zip(foci_position_index[i], foci_position[i], foci_absolute_intensity[i])):
             if config['use_corrected_positions']:
                 path_data[f'Foci_{j+1}_position(um)'] = pl[idx]
             else:
                 path_data[f'Foci_{j+1}_position(um)'] = u
             path_data[f'Foci_{j+1}_absolute_intensity'] = v
             path_data[f'Foci_{j+1}_relative_intensity'] = (v - trace_median_intensities[i])/mean_intensity
         data.append(path_data)
         trace_positions.append(pl)
-    return pd.DataFrame(data), foci_absolute_intensity, foci_position_index, dominated_foci_data, trace_thresholds, trace_positions
 def analyse_paths(cell_id,
@@ -344,24 +361,29 @@ def analyse_paths(cell_id,
     """
     foci_stack = tifffile.imread(foci_file)
     if foci_stack.ndim==2:
         foci_stack = foci_stack[None,:,:]
     all_paths, path_lengths = get_paths_from_traces_file(traces_file)
-    all_trace_vis = []
-    all_m = []
     for p in all_paths:
         vis, m, _ = measure_chrom2(p,foci_stack.transpose(2,1,0), config)
         all_trace_vis.append(vis)
         all_m.append(m)
-    extracted_peaks, foci_absolute_intensity, foci_pos_index, dominated_foci_data, trace_thresholds, trace_positions = extract_peaks(cell_id, all_paths, path_lengths, all_m, config)
     n_cols = 2
     n_rows = (len(all_paths)+n_cols-1)//n_cols
     fig, ax = plt.subplots(n_rows,n_cols, figsize=(5*n_cols, 3*n_rows))
@@ -371,22 +393,24 @@ def analyse_paths(cell_id,
         ax[i].set_title(f'Trace {i+1}')
         ax[i].plot(trace_positions[i], m)
         if len(foci_pos_index[i]):
             ax[i].plot(trace_positions[i][foci_pos_index[i]], np.array(m)[foci_pos_index[i]], 'rx')
-        if len(dominated_foci_data[i]):
-            dominated_foci_pos_index = [u.idx for u in dominated_foci_data[i]]
-            ax[i].plot(trace_positions[i][dominated_foci_pos_index], np.array(m)[dominated_foci_pos_index], color=(0.5,0.5,0.5), marker='o', linestyle='None')
         if trace_thresholds[i] is not None:
             ax[i].axhline(trace_thresholds[i], c='r', ls=':')
         ax[i].set_xlabel('Distance from start (um)')
         ax[i].set_ylabel('Intensity')
     for i in range(len(all_m), n_cols*n_rows):
         ax[i].axis('off')
     plt.tight_layout()
-    trace_overlay = draw_paths(all_paths, foci_stack, foci_index=foci_pos_index)
     return trace_overlay, all_trace_vis, fig, extracted_peaks

     section_lengths = [0.0]
     s = np.array(voxel_size)
     for i in range(len(point_list)-1):
+        # Euclidean distance between successive points
         section_lengths.append(la.norm(s * (np.array(point_list[i+1]) - np.array(point_list[i]))))
     return np.cumsum(section_lengths)
 col_map = [(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255), (0,255,255),
            (255,127,0), (255, 0, 127), (127, 255, 0), (0, 255, 127), (127,0,255), (0,127,255)]
+def draw_paths(all_paths, foci_stack, foci_index=None, r=3, screened_foci_data=None):
     """
     Draws paths on the provided image stack and overlays markers for the foci
         foci_stack (np.array): 3D numpy array representing the image stack.
         foci_index (list, optional): List of list of focus indices (along each path). Defaults to None.
         r (int, optional): Radius for the ellipse or line drawing around the focus. Defaults to 3.
+        screened_foci_data (list, optional): List of RemovedPeakData for screened foci
     Returns:
         PIL.Image.Image: An image with the drawn paths.
     """
     for i, (p, col) in enumerate(zip(all_paths, cycle(col_map))):
         draw.line([(u[0], u[1]) for u in p], fill=col)
         draw.text((p[0][0], p[0][1]), str(i+1), fill=col)
+    if screened_foci_data is not None:
+        for i, removed_peaks in enumerate(screened_foci_data):
+            for p in removed_peaks:
+                u = all_paths[i][p.idx]
+                v = all_paths[p.screening_peak[0]][p.screening_peak[1]]
+                draw.line((int(u[0]), int(u[1]), int(v[0]), int(v[1])), fill=(127,127,127), width=2)
     if foci_index is not None:
         for i, (idx, p, col) in enumerate(zip(foci_index, all_paths, cycle(col_map))):
             if len(idx):
                 for j in idx:
                     draw.line((int(p[j][0]-r), int(p[j][1]), int(p[j][0]+r), int(p[j][1])), fill=col, width=2)
                     draw.line((int(p[j][0]), int(p[j][1]-r), int(p[j][0]), int(p[j][1]+r)), fill=col, width=2)
     return im
     #
     R = [u//2 for u in melem.shape]
     r, c, z = p
     mask = np.zeros(melem.shape)
     Generate a binary representation of a sphere in 3D space.
     Args:
+        R (int, optional): Radius of the sphere. Default is 5. Centred on the centre of the middle voxel.
+                           Includes all voxels whose centre is precisely R from the middle voxel.
         z_scale_ratio (float, optional): Scaling factor for the z-axis. Default is 2.3.
     Returns:
 # Measure fluorescence levels along ordered skeleton
+def measure_chrom2(path, intensity, config):
     """
     Measure fluorescence levels along an ordered skeleton.
     Args:
         path (list): List of ordered path points (r, c, z).
+        intensity (numpy.ndarray): 3D fluorescence data.
         config (dict): Configuration dictionary containing 'z_res', 'xy_res', and 'sphere_radius' values.
     Returns:
         tuple: A tuple containing the visualization, mean measurements, and max measurements along the path.
     """
+    # Calculate size of spheroid used for measurement
     scale_ratio = config['z_res']/config['xy_res']
     sphere_xy_radius = int(math.ceil(config['sphere_radius']/config['xy_res']))
+    vis = visualise_ordering(path, dim=intensity.shape, wr=sphere_xy_radius, wc=sphere_xy_radius)
+    measurements = measure_all_with_sphere(path, intensity, op='mean', R=sphere_xy_radius, z_scale_ratio=scale_ratio)
+    measurements_max = measure_all_with_sphere(path, intensity, op='max', R=sphere_xy_radius, z_scale_ratio=scale_ratio)
     return vis, measurements, measurements_max
     n_paths = len(all_paths)
     data = []
+    foci_absolute_intensity, foci_position, foci_position_index, screened_foci_data, trace_median_intensities, trace_thresholds = analyse_traces(all_paths, path_lengths, measured_traces, config)
+    # Normalize foci intensities (for quantification) using trace medians as estimates of background
     foci_intensities = []
     for path_foci_abs_int, tmi in zip(foci_absolute_intensity, trace_median_intensities):
         foci_intensities.extend(list(path_foci_abs_int - tmi))
+    # Divide all foci intensities by the mean within the cell
     mean_intensity = np.mean(foci_intensities)
     trace_positions = []
     for i in range(n_paths):
+        # Calculate real (Euclidean) distance of each point along the traced path
         pl = calculate_path_length_partials(all_paths[i], (config['xy_res'], config['xy_res'], config['z_res']))
         path_data = { 'Cell_ID':cell_id,
                       'Trace': i+1,
                       'Measured_trace_length(um)': pl[-1],
                       'Trace_median_intensity': trace_median_intensities[i],
                       'Detection_sphere_radius(um)': config['sphere_radius'],
+                      'Screening_distance(voxels)': config['screening_distance'],
+                      'Foci_ID_threshold': config['peak_threshold'],
+                      'Trace_foci_number': len(foci_position_index[i]) }
         for j, (idx, u,v) in enumerate(zip(foci_position_index[i], foci_position[i], foci_absolute_intensity[i])):
             if config['use_corrected_positions']:
+                # Use the calculated position along the traced path
                 path_data[f'Foci_{j+1}_position(um)'] = pl[idx]
             else:
+                # Use the measured trace length (from SNT), and assume all steps of path are approximately the same length
                 path_data[f'Foci_{j+1}_position(um)'] = u
+            # The original measured intensity (mean in spheroid around detected peak)
             path_data[f'Foci_{j+1}_absolute_intensity'] = v
+            # Measure relative intensity by removing per-trace background and dividing by cell total
             path_data[f'Foci_{j+1}_relative_intensity'] = (v - trace_median_intensities[i])/mean_intensity
         data.append(path_data)
         trace_positions.append(pl)
+    return pd.DataFrame(data), foci_absolute_intensity, foci_position_index, screened_foci_data, trace_thresholds, trace_positions
 def analyse_paths(cell_id,
     """
+    # Read stack
     foci_stack = tifffile.imread(foci_file)
+    # If 2D add additional (z) dimension
     if foci_stack.ndim==2:
         foci_stack = foci_stack[None,:,:]
     all_paths, path_lengths = get_paths_from_traces_file(traces_file)
+    all_trace_vis = [] # Per-path visualizations
+    all_m = [] # Per-path measured intensities
     for p in all_paths:
+        # Measure intensity along path - transpose the stack ZYX -> XYZ
         vis, m, _ = measure_chrom2(p,foci_stack.transpose(2,1,0), config)
         all_trace_vis.append(vis)
         all_m.append(m)
+    # Extract all data from paths and traces
+    extracted_peaks, foci_absolute_intensity, foci_pos_index, screened_foci_data, trace_thresholds, trace_positions = extract_peaks(cell_id, all_paths, path_lengths, all_m, config)
+    # Plot per-path measured intensities and indicate foci
     n_cols = 2
     n_rows = (len(all_paths)+n_cols-1)//n_cols
     fig, ax = plt.subplots(n_rows,n_cols, figsize=(5*n_cols, 3*n_rows))
         ax[i].set_title(f'Trace {i+1}')
         ax[i].plot(trace_positions[i], m)
         if len(foci_pos_index[i]):
+            # Plot detected foci
             ax[i].plot(trace_positions[i][foci_pos_index[i]], np.array(m)[foci_pos_index[i]], 'rx')
+        if len(screened_foci_data[i]):
+            # Indicate screened foci by gray circles on plots
+            screened_foci_pos_index = [u.idx for u in screened_foci_data[i]]
+            ax[i].plot(trace_positions[i][screened_foci_pos_index], np.array(m)[screened_foci_pos_index], color=(0.5,0.5,0.5), marker='o', linestyle='None')
+        # Show per-trace intensity thresholds with red dotted lines
         if trace_thresholds[i] is not None:
             ax[i].axhline(trace_thresholds[i], c='r', ls=':')
         ax[i].set_xlabel('Distance from start (um)')
         ax[i].set_ylabel('Intensity')
+    # Hide excess plots
     for i in range(len(all_m), n_cols*n_rows):
         ax[i].axis('off')
     plt.tight_layout()
+    trace_overlay = draw_paths(all_paths, foci_stack, foci_index=foci_pos_index, screened_foci_data=screened_foci_data)
     return trace_overlay, all_trace_vis, fig, extracted_peaks

path_analysis/data_preprocess.py CHANGED Viewed

@@ -74,10 +74,10 @@ class RemovedPeakData(object):
     Attributes:
         idx (int): Index of peak along path
-        dominating_peak (tuple): (path_idx, position along path) for dominating peak
     """
     idx: int
-    dominating_peak: tuple
 @dataclass
 class PathData(object):
@@ -86,17 +86,17 @@ class PathData(object):
     This dataclass encapsulates information about the peaks,
     the defining points, the fluorescence values, and the path length of a specific path.
-    Attributes: peaks (list): List of peaks in the path (indicies of positions in points, o_hei10).
         removed_peaks (list): List of peaks in the path which have been removed because of a nearby larger peak
         points (list): List of points defining the path.
-        o_hei10 (list): List of (unnormalized) fluorescence intensity values along the path
         SC_length (float): Length of the path.
     """
     peaks: list
     removed_peaks: list
     points: list
-    o_hei10: list
     SC_length: float
 @dataclass
@@ -138,7 +138,7 @@ def find_peaks2(v, distance=5,  prominence=0.5):
     return n_peaks, _
-def process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence):
     """
     Process traces of cells to extract peak information and organize the data.
@@ -152,6 +152,7 @@ def process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence):
         path_lengths (list of float): List of path lengths corresponding to the provided paths.
         measured_trace_fluorescence (list of list of float): A list containing fluorescence
                                                             data corresponding to each path point.
     Returns:
         CellData: An object containing organized peak and path data for a given cell.
@@ -163,17 +164,17 @@ def process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence):
     cell_peaks = []
-    for points, o_hei10 in zip(all_paths, measured_trace_fluorescence):
         # For peak determination normalize each trace to have mean zero and s.d. 1
-        hei10_normalized = (o_hei10 - np.mean(o_hei10))/np.std(o_hei10)
         # Find peaks - these will be further refined later
-        p,_ = find_peaks2(hei10_normalized, distance=5,  prominence=0.5*np.std(hei10_normalized))
         peaks = np.array(p, dtype=np.int32)
         # Store peak data - using original values, not normalized ones
-        peak_mean_heights = [ o_hei10[u] for u in peaks ]
         peak_points = [ points[u] for u in peaks ]
         cell_peaks.append((peaks, peak_points, peak_mean_heights))
@@ -188,7 +189,7 @@ def process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence):
             to_thin.append(PeakData(pos=cell_peaks[k][1][u], intensity=cell_peaks[k][2][u], key=(k, u)))
     # Exclude any peak with a nearby brighter peak (on any SC)
-    removed_peaks, removed_larger_peaks = thin_peaks(to_thin, return_larger_peaks=True)
     # Clean up and remove these peaks
     new_cell_peaks = []
@@ -206,7 +207,7 @@ def process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence):
                 # What's the larger point?
                 idx = removed_peaks.index((path_idx, peak_idx))
                 larger_path, larger_idx = removed_larger_peaks[idx]
-                path_removed_peaks.append(RemovedPeakData(idx=path_peaks[peak_idx], dominating_peak=(larger_path, cell_peaks[larger_path][0][larger_idx])))
                 ###
         new_cell_peaks.append(path_retained_peaks)
@@ -215,15 +216,15 @@ def process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence):
     cell_peaks = new_cell_peaks
     pd_list = []
-    # Save peak positions, absolute HEI10 intensities, and length for each SC
     for k in range(len(all_paths)):
-        points, o_hei10 = all_paths[k], measured_trace_fluorescence[k]
         peaks = cell_peaks[k]
         removed_peaks = removed_cell_peaks[k]
-        pd = PathData(peaks=peaks, removed_peaks=removed_peaks, points=points, o_hei10=o_hei10, SC_length=path_lengths[k])
         pd_list.append(pd)
     cd = CellData(pathdata_list=pd_list)
@@ -235,7 +236,7 @@ alpha_max = 0.4
 # Criterion used for identifying peak as a focus - normalized (with mean and s.d.)
-# hei10 levels being above 0.4 time maximum peak level
 def focus_criterion(pos, v, alpha=alpha_max):
     """
     Identify and return positions where values in the array `v` exceed a certain threshold.
@@ -271,14 +272,14 @@ def analyse_celldata(cell_data, config):
             - foci_rel_intensity (list): List of relative intensities for the detected foci.
             - foci_pos (list): List of absolute positions of the detected foci.
             - foci_pos_index (list): List of indices of the detected foci.
-            - dominated_foci_data (list): List of RemovedPeakData indicating positions of removed peaks and the index of the larger peak
             - trace_median_intensities (list): Per-trace median intensity
             - trace_thresholds (list): Per-trace absolute threshold for calling peaks as foci
     """
     foci_abs_intensity = []
     foci_pos = []
     foci_pos_index = []
-    dominated_foci_data = []
     trace_median_intensities = []
     trace_thresholds = []
@@ -296,12 +297,11 @@ def analyse_celldata(cell_data, config):
             # Normalize extracted fluorescent intensities by subtracting mean (and dividing
             # by standard deviation - note that the latter should have no effect on the results).
-            h = np.array(path_data.o_hei10)
             h = h - np.mean(h)
             h = h/np.std(h)
             # Extract foci according to criterion
             foci_idx = focus_criterion(peaks, h[peaks], peak_threshold)
-            print('peaks', peaks, h[peaks], foci_idx, np.mean(path_data.o_hei10))
             #
             removed_peaks = path_data.removed_peaks
@@ -309,30 +309,30 @@ def analyse_celldata(cell_data, config):
             if len(peaks):
-                trace_thresholds.append((1-peak_threshold)*np.mean(path_data.o_hei10) + peak_threshold*np.max(np.array(path_data.o_hei10)[peaks]))
             else:
                 trace_thresholds.append(None)
             if len(removed_peaks):
                 if len(peaks):
-                    threshold = (1-peak_threshold)*np.mean(path_data.o_hei10) + peak_threshold*np.max(np.array(path_data.o_hei10)[peaks])
                 else:
                     threshold = float('-inf')
-                removed_peak_heights = np.array(path_data.o_hei10)[removed_peaks_idx]
-                dominated_foci_idx = np.where(removed_peak_heights>threshold)[0]
-                dominated_foci_data.append([removed_peaks[i] for i in dominated_foci_idx])
             else:
-                dominated_foci_data.append([])
             pos_abs = (foci_idx/len(path_data.points))*path_data.SC_length
             foci_pos.append(pos_abs)
-            foci_abs_intensity.append(np.array(path_data.o_hei10)[foci_idx])
             foci_pos_index.append(foci_idx)
-            trace_median_intensities.append(np.median(path_data.o_hei10))
     elif threshold_type == 'per-cell':
         """
@@ -343,7 +343,7 @@ def analyse_celldata(cell_data, config):
             # Normalize extracted fluorescent intensities by subtracting mean (and dividing
             # by standard deviation - note that the latter should have no effect on the results).
-            h = np.array(path_data.o_hei10)
             h = h - np.mean(h)
             max_cell_intensity = max(max_cell_intensity, np.max(h))
@@ -352,7 +352,7 @@ def analyse_celldata(cell_data, config):
             # Normalize extracted fluorescent intensities by subtracting mean (and dividing
             # by standard deviation - note that the latter should have no effect on the results).
-            h = np.array(path_data.o_hei10)
             h = h - np.mean(h)
             foci_idx = peaks[h[peaks]>peak_threshold*max_cell_intensity]
@@ -360,33 +360,33 @@ def analyse_celldata(cell_data, config):
             removed_peaks = path_data.removed_peaks
             removed_peaks_idx = np.array([u.idx for u in removed_peaks], dtype=np.int32)
-            trace_thresholds.append(np.mean(path_data.o_hei10) + peak_threshold*max_cell_intensity)
             if len(removed_peaks):
-                threshold = np.mean(path_data.o_hei10) + peak_threshold*max_cell_intensity
-                removed_peak_heights = np.array(path_data.o_hei10)[removed_peaks_idx]
-                dominated_foci_idx = np.where(removed_peak_heights>threshold)[0]
-                dominated_foci_data.append([removed_peaks[i] for i in dominated_foci_idx])
             else:
-                dominated_foci_data.append([])
             pos_abs = (foci_idx/len(path_data.points))*path_data.SC_length
             foci_pos.append(pos_abs)
-            foci_abs_intensity.append(np.array(path_data.o_hei10)[foci_idx])
             foci_pos_index.append(foci_idx)
-            trace_median_intensities.append(np.median(path_data.o_hei10))
     else:
         raise NotImplementedError
-    return foci_abs_intensity, foci_pos, foci_pos_index, dominated_foci_data, trace_median_intensities, trace_thresholds
 def analyse_traces(all_paths, path_lengths, measured_trace_fluorescence, config):
-    cd = process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence)
     return analyse_celldata(cd, config)

     Attributes:
         idx (int): Index of peak along path
+        screening_peak (tuple): (path_idx, position along path) for screening peak
     """
     idx: int
+    screening_peak: tuple
 @dataclass
 class PathData(object):
     This dataclass encapsulates information about the peaks,
     the defining points, the fluorescence values, and the path length of a specific path.
+    Attributes: peaks (list): List of peaks in the path (indicies of positions in points, o_intensity).
         removed_peaks (list): List of peaks in the path which have been removed because of a nearby larger peak
         points (list): List of points defining the path.
+        o_intensity (list): List of (unnormalized) fluorescence intensity values along the path
         SC_length (float): Length of the path.
     """
     peaks: list
     removed_peaks: list
     points: list
+    o_intensity: list
     SC_length: float
 @dataclass
     return n_peaks, _
+def process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence, dmin=10):
     """
     Process traces of cells to extract peak information and organize the data.
         path_lengths (list of float): List of path lengths corresponding to the provided paths.
         measured_trace_fluorescence (list of list of float): A list containing fluorescence
                                                             data corresponding to each path point.
+        dmin (float): Distance below which brighter peaks screen less bright ones.
     Returns:
         CellData: An object containing organized peak and path data for a given cell.
     cell_peaks = []
+    for points, o_intensity in zip(all_paths, measured_trace_fluorescence):
         # For peak determination normalize each trace to have mean zero and s.d. 1
+        intensity_normalized = (o_intensity - np.mean(o_intensity))/np.std(o_intensity)
         # Find peaks - these will be further refined later
+        p,_ = find_peaks2(intensity_normalized, distance=5,  prominence=0.5*np.std(intensity_normalized))
         peaks = np.array(p, dtype=np.int32)
         # Store peak data - using original values, not normalized ones
+        peak_mean_heights = [ o_intensity[u] for u in peaks ]
         peak_points = [ points[u] for u in peaks ]
         cell_peaks.append((peaks, peak_points, peak_mean_heights))
             to_thin.append(PeakData(pos=cell_peaks[k][1][u], intensity=cell_peaks[k][2][u], key=(k, u)))
     # Exclude any peak with a nearby brighter peak (on any SC)
+    removed_peaks, removed_larger_peaks = thin_peaks(to_thin, return_larger_peaks=True, dmin=dmin)
     # Clean up and remove these peaks
     new_cell_peaks = []
                 # What's the larger point?
                 idx = removed_peaks.index((path_idx, peak_idx))
                 larger_path, larger_idx = removed_larger_peaks[idx]
+                path_removed_peaks.append(RemovedPeakData(idx=path_peaks[peak_idx], screening_peak=(larger_path, cell_peaks[larger_path][0][larger_idx])))
                 ###
         new_cell_peaks.append(path_retained_peaks)
     cell_peaks = new_cell_peaks
     pd_list = []
+    # Save peak positions, absolute intensity intensities, and length for each SC
     for k in range(len(all_paths)):
+        points, o_intensity = all_paths[k], measured_trace_fluorescence[k]
         peaks = cell_peaks[k]
         removed_peaks = removed_cell_peaks[k]
+        pd = PathData(peaks=peaks, removed_peaks=removed_peaks, points=points, o_intensity=o_intensity, SC_length=path_lengths[k])
         pd_list.append(pd)
     cd = CellData(pathdata_list=pd_list)
 # Criterion used for identifying peak as a focus - normalized (with mean and s.d.)
+# intensity levels being above 0.4 time maximum peak level
 def focus_criterion(pos, v, alpha=alpha_max):
     """
     Identify and return positions where values in the array `v` exceed a certain threshold.
             - foci_rel_intensity (list): List of relative intensities for the detected foci.
             - foci_pos (list): List of absolute positions of the detected foci.
             - foci_pos_index (list): List of indices of the detected foci.
+            - screened_foci_data (list): List of RemovedPeakData indicating positions of removed peaks and the index of the larger peak
             - trace_median_intensities (list): Per-trace median intensity
             - trace_thresholds (list): Per-trace absolute threshold for calling peaks as foci
     """
     foci_abs_intensity = []
     foci_pos = []
     foci_pos_index = []
+    screened_foci_data = []
     trace_median_intensities = []
     trace_thresholds = []
             # Normalize extracted fluorescent intensities by subtracting mean (and dividing
             # by standard deviation - note that the latter should have no effect on the results).
+            h = np.array(path_data.o_intensity)
             h = h - np.mean(h)
             h = h/np.std(h)
             # Extract foci according to criterion
             foci_idx = focus_criterion(peaks, h[peaks], peak_threshold)
             #
             removed_peaks = path_data.removed_peaks
             if len(peaks):
+                trace_thresholds.append((1-peak_threshold)*np.mean(path_data.o_intensity) + peak_threshold*np.max(np.array(path_data.o_intensity)[peaks]))
             else:
                 trace_thresholds.append(None)
             if len(removed_peaks):
                 if len(peaks):
+                    threshold = (1-peak_threshold)*np.mean(path_data.o_intensity) + peak_threshold*np.max(np.array(path_data.o_intensity)[peaks])
                 else:
                     threshold = float('-inf')
+                removed_peak_heights = np.array(path_data.o_intensity)[removed_peaks_idx]
+                screened_foci_idx = np.where(removed_peak_heights>threshold)[0]
+                screened_foci_data.append([removed_peaks[i] for i in screened_foci_idx])
             else:
+                screened_foci_data.append([])
             pos_abs = (foci_idx/len(path_data.points))*path_data.SC_length
             foci_pos.append(pos_abs)
+            foci_abs_intensity.append(np.array(path_data.o_intensity)[foci_idx])
             foci_pos_index.append(foci_idx)
+            trace_median_intensities.append(np.median(path_data.o_intensity))
     elif threshold_type == 'per-cell':
         """
             # Normalize extracted fluorescent intensities by subtracting mean (and dividing
             # by standard deviation - note that the latter should have no effect on the results).
+            h = np.array(path_data.o_intensity)
             h = h - np.mean(h)
             max_cell_intensity = max(max_cell_intensity, np.max(h))
             # Normalize extracted fluorescent intensities by subtracting mean (and dividing
             # by standard deviation - note that the latter should have no effect on the results).
+            h = np.array(path_data.o_intensity)
             h = h - np.mean(h)
             foci_idx = peaks[h[peaks]>peak_threshold*max_cell_intensity]
             removed_peaks = path_data.removed_peaks
             removed_peaks_idx = np.array([u.idx for u in removed_peaks], dtype=np.int32)
+            trace_thresholds.append(np.mean(path_data.o_intensity) + peak_threshold*max_cell_intensity)
             if len(removed_peaks):
+                threshold = np.mean(path_data.o_intensity) + peak_threshold*max_cell_intensity
+                removed_peak_heights = np.array(path_data.o_intensity)[removed_peaks_idx]
+                screened_foci_idx = np.where(removed_peak_heights>threshold)[0]
+                screened_foci_data.append([removed_peaks[i] for i in screened_foci_idx])
             else:
+                screened_foci_data.append([])
             pos_abs = (foci_idx/len(path_data.points))*path_data.SC_length
             foci_pos.append(pos_abs)
+            foci_abs_intensity.append(np.array(path_data.o_intensity)[foci_idx])
             foci_pos_index.append(foci_idx)
+            trace_median_intensities.append(np.median(path_data.o_intensity))
     else:
         raise NotImplementedError
+    return foci_abs_intensity, foci_pos, foci_pos_index, screened_foci_data, trace_median_intensities, trace_thresholds
 def analyse_traces(all_paths, path_lengths, measured_trace_fluorescence, config):
+    cd = process_cell_traces(all_paths, path_lengths, measured_trace_fluorescence, dmin=config['screening_distance'])
     return analyse_celldata(cd, config)

tests/test_analyse.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import pytest
 from path_analysis.analyse import *
 import numpy as np
 from math import pi
 import xml.etree.ElementTree as ET
@@ -99,7 +100,7 @@ def test_get_paths_from_traces_file():
 def test_measure_chrom2():
     # Mock data
     path = [(2, 3, 4), (4, 5, 6), (9, 9, 9)]  # Sample ordered path points
-    hei10 = np.random.rand(10, 10, 10)  # Random 3D fluorescence data
     config = {
         'z_res': 1,
         'xy_res': 0.5,
@@ -107,7 +108,7 @@ def test_measure_chrom2():
     }
     # Function call
-    _, measurements, measurements_max = measure_chrom2(path, hei10, config)
     # Assertions
     assert len(measurements) == len(path), "Measurements length should match path length"
@@ -118,7 +119,7 @@ def test_measure_chrom2():
 def test_measure_chrom2_z():
     # Mock data
     path = [(2, 3, 4), (4, 5, 6)]  # Sample ordered path points
-    _,_,hei10 = np.meshgrid(np.arange(10), np.arange(10), np.arange(10))  # 3D fluorescence data - z dependent
     config = {
         'z_res': 1,
         'xy_res': 0.5,
@@ -126,7 +127,7 @@ def test_measure_chrom2_z():
     }
     # Function call
-    _, measurements, measurements_max = measure_chrom2(path, hei10, config)
     # Assertions
     assert len(measurements) == len(path), "Measurements length should match path length"
@@ -137,7 +138,7 @@ def test_measure_chrom2_z():
 def test_measure_chrom2_z2():
     # Mock data
     path = [(0,0,0), (2, 3, 4), (4, 5, 6)]  # Sample ordered path points
-    _,_,hei10 = np.meshgrid(np.arange(10), np.arange(10), np.arange(10))  # 3D fluorescence data - z dependent
     config = {
         'z_res': 0.25,
         'xy_res': 0.5,
@@ -145,7 +146,7 @@ def test_measure_chrom2_z2():
     }
     # Function call
-    _, measurements, measurements_max = measure_chrom2(path, hei10, config)
     # Assertions
     assert len(measurements) == len(path), "Measurements length should match path length"
@@ -283,31 +284,87 @@ def test_make_sphere_equal():
 import pandas as pd
-# 1. Test basic functionality
 def test_extract_peaks_basic():
-    cell_id = 1
-    all_paths = [[[0, 0], [1, 1]]]
     path_lengths = [1.41]  # length of the above path
     measured_traces = [[100, 200]]  # fluorescence along the path
-    config = {'peak_threshold': 0.4, 'sphere_radius': 2, 'xy_res': 1, 'z_res': 1, 'use_corrected_positions': True}
-    df, foci_abs_int, foci_pos_idx, _, _, _ = extract_peaks(cell_id, all_paths, path_lengths, measured_traces, config)
-    # Now add your assertions to validate the result
     assert len(df) == 1, "Expected one row in DataFrame"
     assert df['Cell_ID'].iloc[0] == cell_id, "Unexpected cell_id"
-    # Add more assertions here based on expected values
-# 2. Test multiple paths
 def test_extract_peaks_multiple_paths():
     cell_id = 1
-    all_paths = [[[0, 0], [1, 1]], [[1, 1], [2, 2]]]
     path_lengths = [1.41, 1.41]
     measured_traces = [[100, 200], [100, 150]]
-    config = {'peak_threshold': 0.4, 'sphere_radius': 2, 'xy_res': 1, 'z_res': 1, 'use_corrected_positions': True}
-    df, _, _, _, _, _ = extract_peaks(cell_id, all_paths, path_lengths, measured_traces, config)
     assert len(df) == 2, "Expected two rows in DataFrame"
-    # Add more assertions here

 import pytest
 from path_analysis.analyse import *
+from path_analysis.data_preprocess import RemovedPeakData
 import numpy as np
 from math import pi
 import xml.etree.ElementTree as ET
 def test_measure_chrom2():
     # Mock data
     path = [(2, 3, 4), (4, 5, 6), (9, 9, 9)]  # Sample ordered path points
+    intensity = np.random.rand(10, 10, 10)  # Random 3D fluorescence data
     config = {
         'z_res': 1,
         'xy_res': 0.5,
     }
     # Function call
+    _, measurements, measurements_max = measure_chrom2(path, intensity, config)
     # Assertions
     assert len(measurements) == len(path), "Measurements length should match path length"
 def test_measure_chrom2_z():
     # Mock data
     path = [(2, 3, 4), (4, 5, 6)]  # Sample ordered path points
+    _,_,intensity = np.meshgrid(np.arange(10), np.arange(10), np.arange(10))  # 3D fluorescence data - z dependent
     config = {
         'z_res': 1,
         'xy_res': 0.5,
     }
     # Function call
+    _, measurements, measurements_max = measure_chrom2(path, intensity, config)
     # Assertions
     assert len(measurements) == len(path), "Measurements length should match path length"
 def test_measure_chrom2_z2():
     # Mock data
     path = [(0,0,0), (2, 3, 4), (4, 5, 6)]  # Sample ordered path points
+    _,_,intensity = np.meshgrid(np.arange(10), np.arange(10), np.arange(10))  # 3D fluorescence data - z dependent
     config = {
         'z_res': 0.25,
         'xy_res': 0.5,
     }
     # Function call
+    _, measurements, measurements_max = measure_chrom2(path, intensity, config)
     # Assertions
     assert len(measurements) == len(path), "Measurements length should match path length"
 import pandas as pd
 def test_extract_peaks_basic():
+    cell_id = 1 # Simple per-cell tag
+    all_paths = [[[0, 0, 0], [1, 1, 0]]] # Single, simple path
     path_lengths = [1.41]  # length of the above path
     measured_traces = [[100, 200]]  # fluorescence along the path
+    config = {'peak_threshold': 0.4, 'sphere_radius': 2, 'xy_res': 1, 'z_res': 1, 'threshold_type':'per-cell', 'use_corrected_positions': True, 'screening_distance':10 }
+    df, foci_absolute_intensity, foci_pos_index, screened_foci_data, trace_thresholds, trace_positions = extract_peaks(cell_id, all_paths, path_lengths, measured_traces, config)
     assert len(df) == 1, "Expected one row in DataFrame"
     assert df['Cell_ID'].iloc[0] == cell_id, "Unexpected cell_id"
+    assert list(df['Trace_foci_number']) == [1], "Wrong foci number"
+    assert df['Foci_1_position(um)'].iloc[0] == np.sqrt(2)
+    assert foci_pos_index == [[1]]
+    assert foci_absolute_intensity == [[200]]
+    assert screened_foci_data == [[]]
+    assert trace_thresholds == [ [ 150+0.4*50] ]
+    assert np.all(trace_positions[0] ==  np.array([0, np.sqrt(2)]))
 def test_extract_peaks_multiple_paths():
     cell_id = 1
+    all_paths = [[[0, 0, 0], [1, 1, 0]], [[1, 1, 200], [2, 2, 200]]]
+    path_lengths = [1.41, 1.41]
+    measured_traces = [[100, 200], [100, 140]]
+    config = {'peak_threshold': 0.4, 'sphere_radius': 2, 'xy_res': 1, 'z_res': 1, 'threshold_type':'per-trace', 'use_corrected_positions': True, 'screening_distance':10 }
+    df, foci_absolute_intensity, foci_pos_index, screened_foci_data, trace_thresholds, trace_positions = extract_peaks(cell_id, all_paths, path_lengths, measured_traces, config)
+    assert len(df) == 2, "Expected two rows in DataFrame"
+    assert df['Cell_ID'].iloc[0] == cell_id, "Unexpected cell_id"
+    assert list(df['Trace_foci_number']) == [1,1], "Wrong foci number"
+    assert df['Foci_1_position(um)'].iloc[0] == np.sqrt(2)
+    print(foci_pos_index)
+    assert list(map(list, foci_pos_index)) == [[1],[1]]
+    assert list(map(list, foci_absolute_intensity)) == [[200],[140]]
+    assert trace_thresholds == [ 150+0.4*50, 120+0.4*20 ]
+    assert np.all(trace_positions[0] ==  np.array([0, np.sqrt(2)]))
+    assert screened_foci_data == [[],[]]
+def test_extract_peaks_multiple_paths_screened():
+    cell_id = 1
+    all_paths = [[[0, 0, 0], [1, 1, 0]], [[1, 1, 2], [2, 2, 2]]]
     path_lengths = [1.41, 1.41]
     measured_traces = [[100, 200], [100, 150]]
+    config = {'peak_threshold': 0.4, 'sphere_radius': 2, 'xy_res': 1, 'z_res': 1, 'threshold_type':'per-trace', 'use_corrected_positions': True, 'screening_distance':10 }
+    df, foci_absolute_intensity, foci_pos_index, screened_foci_data, trace_thresholds, trace_positions = extract_peaks(cell_id, all_paths, path_lengths, measured_traces, config)
     assert len(df) == 2, "Expected two rows in DataFrame"
+    assert df['Cell_ID'].iloc[0] == cell_id, "Unexpected cell_id"
+    assert list(df['Trace_foci_number']) == [1,0], "Wrong foci number"
+    assert df['Foci_1_position(um)'].iloc[0] == np.sqrt(2)
+    print(foci_pos_index)
+    assert list(map(list, foci_pos_index)) == [[1],[]]
+    assert list(map(list, foci_absolute_intensity)) == [[200],[]]
+    assert trace_thresholds == [ 150+0.4*50, None ]
+    assert np.all(trace_positions[0] ==  np.array([0, np.sqrt(2)]))
+    assert screened_foci_data == [[],[RemovedPeakData(idx=1, screening_peak=(0,1))]]
+def test_extract_peaks_multiple_paths_per_cell():
+    cell_id = 1
+    all_paths = [[[0, 0, 0], [1, 1, 0]], [[1, 1, 200], [2, 2, 200]]]
+    path_lengths = [1.41, 1.41]
+    measured_traces = [[100, 200], [100, 140]]
+    config = {'peak_threshold': 0.4, 'sphere_radius': 2, 'xy_res': 1, 'z_res': 1, 'threshold_type':'per-cell', 'use_corrected_positions': True, 'screening_distance':10 }
+    df, foci_absolute_intensity, foci_pos_index, screened_foci_data, trace_thresholds, trace_positions = extract_peaks(cell_id, all_paths, path_lengths, measured_traces, config)
+    assert len(df) == 2, "Expected two rows in DataFrame"
+    assert df['Cell_ID'].iloc[0] == cell_id, "Unexpected cell_id"
+    assert list(df['Trace_foci_number']) == [1,0], "Wrong foci number"
+    assert df['Foci_1_position(um)'].iloc[0] == np.sqrt(2)
+    assert list(map(list, foci_pos_index)) == [[1],[]]
+    assert list(map(list, foci_absolute_intensity)) == [[200],[]]
+    assert trace_thresholds == [ 150+0.4*50, 120+0.4*50 ]
+    assert np.all(trace_positions[0] ==  np.array([0, np.sqrt(2)]))
+    assert screened_foci_data == [[],[]]

tests/test_preprocess.py CHANGED Viewed

@@ -128,8 +128,8 @@ def test_process_cell_traces_peaks(mock_data):
 # Mock data
 @pytest.fixture
 def mock_celldata():
-    pathdata1 = PathData(peaks=[0, 5], points=[(0,0,0), (0,2,0), (0,5,0), (0,10,0), (0,15,0), (0,20,0)], removed_peaks=[], o_hei10=[100, 8, 3, 2, 3, 69], SC_length=2.2)
-    pathdata2 = PathData(peaks=[2], points=[(1,20,0), (1,20,10), (1,20,20) ], removed_peaks=[RemovedPeakData(0, (0,5))], o_hei10=[38, 2, 20], SC_length=2.3)
     return CellData(pathdata_list=[pathdata1, pathdata2])
 def test_analyse_celldata(mock_celldata):

 # Mock data
 @pytest.fixture
 def mock_celldata():
+    pathdata1 = PathData(peaks=[0, 5], points=[(0,0,0), (0,2,0), (0,5,0), (0,10,0), (0,15,0), (0,20,0)], removed_peaks=[], o_intensity=[100, 8, 3, 2, 3, 69], SC_length=2.2)
+    pathdata2 = PathData(peaks=[2], points=[(1,20,0), (1,20,10), (1,20,20) ], removed_peaks=[RemovedPeakData(0, (0,5))], o_intensity=[38, 2, 20], SC_length=2.3)
     return CellData(pathdata_list=[pathdata1, pathdata2])
 def test_analyse_celldata(mock_celldata):