"""Functions for calculating array similarity metrics for text""" import numpy as np from matplotlib import pyplot as plt from scipy.optimize import minimize from scold import draw from scold import arr_sim from scold import utils def text_arr_sim(a, b=None, font_a='arial.ttf', font_b='arial.ttf', b_arr=None, measure='jaccard', translate=True, fliplr=False, flipud=False, size=100, scale_val=1.0, rotate_val=0.0, plot=False, partial_wasserstein_kwargs={'scale_mass':True, 'mass_normalise':True, 'distance_normalise':True, 'translation':'opt', 'n_startvals':7, 'solver':'Nelder-Mead', 'search_method':'grid'}, **kwargs): """Calculate similarity metrics for two strings of text, translated to achieve optimal overlap. Parameters ---------- a : str b : str, optional Must be defined if `b_arr` is not. Ignored if `b_arr` is defined. font_a : str, optional `.ttf` font to use to build text array from `a` font_b : str, optional `.ttf` font to use to build text array from `b` b_arr : ndarray, optional Array that the array built from `a` will be compared to. This option is included as it is faster to pre-build the array that `a` will be compared to, if applying this function in a loop. If both `b` and `b_arr` are defined, `b` will be ignored. measure : str Which measure to maximise (or minimise in the case of distance measures) to find the optimal overlap between arrays. Possible options are any metrics calculated by `arr_sim.arr_sim()`. translate : bool Should translation be optimised? If `False`, will just calculate similarities for default positions. If `True`, will use 2D cross-correlation for all measures except Wasserstein distance, which can use nonlinear optimisation via arr_sim.partial_wasserstein_trans(). That function also supports 2D cross-correlation for comparability. fliplr : bool Should the text built from `a` be mirrored horizontally? flipud : bool Should the text built from `a` be mirrored vertically? size : int The size of the text to draw. scale_val : float This is multiplied by `size` to calculate the size of the text array built from `a`, rounded to the nearest pixel. If `b_arr` is not pre-built, `b_arr` is built at size `size`, such that `scale_val` says how many times bigger `a_arr` should be than `b_arr`. rotate_val : float Degrees by which the text built from `a` should be rotated. plot : bool Should the solution be plotted? partial_wasserstein_kwargs : dict kwargs to be passed to arr_sim.partial_wasserstein() or arr_sim.partial_wasserstein_trans() **kwargs Other arguments to pass to `draw.text_array()`. Returns ------- dict Returns a dictionary with the similarity metrics calculated in `arr_sim.arr_sim()`, with an additional entry, `'shift'`, which contains the optimal translation values calculated by `arr_sim.translate_ov`, in form `(x, y)`. Examples -------- >>> text_arr_sim('d', 'p') {'jaccard': 0.5915244261330195, 'shift': (1, 19)} >>> text_arr_sim('d', 'c') {'jaccard': 0.6390658174097664, 'shift': (1, 0)} >>> text_arr_sim('d', 'p', measure='partial_wasserstein', partial_wasserstein_kwargs={'scale_mass':True, 'mass_normalise':True, 'distance_normalise':True, 'translation':'crosscor', 'n_startvals':7, 'solver':'Nelder-Mead', 'search_method':'grid'}) {'partial_wasserstein': 0.11199633634025599, 'shift': (1, 19)} >>> text_arr_sim('d', 'p', measure='partial_wasserstein', partial_wasserstein_kwargs={'scale_mass':True, 'mass_normalise':True, 'distance_normalise':True, 'translation':'opt', 'n_startvals':7, 'solver':'Nelder-Mead', 'search_method':'grid'}) {'partial_wasserstein': 0.07609689664769317, 'shift': (5.0, 11.0)} """ a_arr = draw.text_array(a, font=font_a, rotate=rotate_val, fliplr=fliplr, flipud=flipud, size=size*scale_val, **kwargs) if np.all(b_arr == None): b_arr = draw.text_array(b, font=font_b, rotate=0, fliplr=False, flipud=False, size=size, **kwargs) # faster to pre-define and give as argument if using the same b text in a loop if measure=='partial_wasserstein': if translate and partial_wasserstein_kwargs['translation'] is None: raise ValueError('Translation was requested, but the Partial Wasserstein translation method was not specified') elif not translate and partial_wasserstein_kwargs['translation'] is not None: print(translate) print(partial_wasserstein_kwargs) raise ValueError('Translation was not requested, but a Partial Wasserstein translation method was specified') if translate: if measure == 'partial_wasserstein': a_aligned, b_aligned = (a_arr, b_arr) # ensure same size with zero-padding a_aligned, b_aligned = utils.pad_for_translation(a_aligned, b_aligned, pad=False) else: a_aligned, b_aligned, shift = arr_sim.translate_ov(a_arr, b_arr, return_first_only=True) else: a_aligned, b_aligned = (a_arr, b_arr) shift = (0, 0) # ensure same size with zero-padding a_aligned, b_aligned = utils.pad_for_translation(a_aligned, b_aligned, pad=False) if plot: pl_sh = list(a_aligned.shape) pl_sh.append(3) rgb_arr = np.zeros(pl_sh) rgb_arr[:, :, 0] = a_aligned rgb_arr[:, :, 2] = b_aligned plt.imshow(utils.crop_zeros(rgb_arr), interpolation='none') # get the similarity measures arr_sim_out = arr_sim.arr_sim(a_aligned, b_aligned, measure=measure, partial_wasserstein_kwargs=partial_wasserstein_kwargs) sim_res = {} if measure=='partial_wasserstein': sim_res[measure] = arr_sim_out['metric'] shift = arr_sim_out['trans'] else: sim_res[measure] = arr_sim_out # flip the order of shift, as the array indices (x, y) refer to (y, x) in the image shift_flipped = (shift[1], shift[0]) # add shift to the results sim_res['shift'] = shift_flipped return(sim_res) def _opt_text_arr_sim_flip_manual(a='a', b=None, font_a='arial.ttf', font_b='arial.ttf', b_arr=None, measure='jaccard', translate=True, scale=True, rotate=True, fliplr=False, flipud=False, size=100, rotation_bounds=(-np.Infinity, np.Infinity), max_scale_change_factor=2.0, rotation_eval_n=9, scale_eval_n=9, solver='Nelder-Mead', search_method='grid', plot=False, partial_wasserstein_kwargs={'scale_mass':True, 'mass_normalise':True, 'distance_normalise': True, 'ins_weight':0.0, 'del_weight':0.0}, **kwargs): """Find parameters for geometric operations of translation, scale, and rotation that maximise overlap between two arrays of drawn text. Parameters ---------- a : str b : str, optional Must be defined if `b_arr` is not. Ignored if `b_arr` is defined. font_a : str, optional `.ttf` font to use to build text array from `a` font_b : str, optional `.ttf` font to use to build text array from `b` b_arr : ndarray, optional Array that the array built from `a` will be compared to. This option is included as it is faster to pre-build the array that `a` will be compared to, if applying this function in a loop. If both `b` and `b_arr` are defined, `b` will be ignored. measure: str Which measure to maximise (or minimise in the case of `px_dist`) to find the optimal overlap between arrays. Possible options are any metrics calculated by `arr_sim.arr_sim()`. translate : bool Should the translation operation be optimised via cross-correlation? If `False`, will always use default positions. scale: bool Should scale be optimised? rotate : bool Should rotation be optimised? fliplr : bool Should `b` be flipped horizontally? Note that in this version of the function, `b` is not optimised. Instead, this function can be run with this set to True and False, and the best result taken. flipud : bool Should `b` be flipped vertically? Note that in this version of the function, `b` is not optimised. Instead, this function can be run with this set to True and False, and the best result taken. size : int Size for the text (the scale parameter will be multiplied by this value). rotation_bounds : tuple Limits for optimising rotation in form `(lowerbound, upperbound)`. For example, `(-90, 90)` will limit rotation to 90 degrees in either direction. max_scale_change_factor : float Maximum value for the optimised scale parameter. `max_scale_change_factor=2` will permit 100% bigger or 50% smaller, i.e., twice as large or twice as small. rotation_eval_n : int How many starting values should be tried for optimising rotation? scale_eval_n : int How many starting values should be tried for optimising scale? solver : str Which solver to use? Possible values are those available to `scipy.optimize.minimize()`. search_method : str Method for setting starting values. Options are: 'grid': set in equal steps from the lower to the upper bound 'random': set randomly between the lower and upper bound plot : bool Should the optimal overlap be plotted? partial_wasserstein_kwargs : dict kwargs to be passed to arr_sim.partial_wasserstein() or arr_sim.partial_wasserstein_trans() **kwargs Other arguments to pass to `text_arr_sim()`. Returns ------- dict A dictionary containing the following values: 'translate': Whether translation was optimised 'scale': Whether scale was optimised 'rotate': Whether rotation was optimised 'fliplr': Placeholder for main function (always `None`) 'flipud': Placeholder for main function (always `None`) 'intersection', 'union', 'overlap', 'jaccard', 'dice': Values from `arr_sim.arr_sim()` 'translate_val_x': Optimal shift value in x dimension 'translate_val_y': Optimal shift value in y dimension 'scale_val': Optimal scale coefficient 'rotate_val': Optimal rotation coefficient 'flip_val': Whether the array was slipped horizontally """ if measure in ('px_dist', 'partial_wasserstein'): do_minimise = True else: do_minimise = False if np.all(b_arr == None): b_arr = draw.text_array(b, font=font_b, rotate=0, fliplr=False, flipud=False, size=size, **kwargs) # if neither scale nor rotation need to be optimised, just use the cross correlation approach to get optimal cold values... if (not scale) and (not rotate): sim_res = text_arr_sim(a, b_arr=b_arr, measure=measure, font_a=font_a, translate=translate, fliplr=fliplr, flipud=flipud, scale_val=1, rotate_val=0, size=size, partial_wasserstein_kwargs=partial_wasserstein_kwargs, **kwargs) poss_scale_vals = [0] poss_rotate_vals = [0] # otherwise, optimise scale and/or rotation else: # functions which will be optimised (note that scale is on a log-scale here for the optimiser - this is useful as centred on zero and will have same precision for increase and decrease, i.e. whether 2x or 0.5x) and prevent a scale of 0 def sim_opt_scale_rotate(x): # translate log scale to raw scale scale_exp = np.exp(x[0]) m = text_arr_sim(a, b_arr=b_arr, measure=measure, font_a=font_a, translate=translate, fliplr=fliplr, flipud=flipud, scale_val=scale_exp, rotate_val=x[1], size=size, partial_wasserstein_kwargs=partial_wasserstein_kwargs, **kwargs)[measure] if do_minimise: return m else: return 1 - m def sim_opt_scale(x): # translate log scale to raw scale scale_exp = np.exp(x[0]) m = text_arr_sim(a, b_arr=b_arr, measure=measure, font_a=font_a, translate=translate, fliplr=fliplr, flipud=flipud, scale_val=scale_exp, rotate_val=0, size=size, partial_wasserstein_kwargs=partial_wasserstein_kwargs, **kwargs)[measure] if do_minimise: return m else: return 1 - m def sim_opt_rotate(x): m = text_arr_sim(a, b_arr=b_arr, measure=measure, font_a=font_a, translate=translate, fliplr=fliplr, flipud=flipud, scale_val=1, rotate_val=x[0], size=size, partial_wasserstein_kwargs=partial_wasserstein_kwargs, **kwargs)[measure] if do_minimise: return m else: return 1 - m # bounds of scale optimisation scale_bounds = (-np.log(max_scale_change_factor), np.log(max_scale_change_factor)) # starting values for optimising scale and rotation if search_method=='grid': starting_points_scale = np.linspace( scale_bounds[0], scale_bounds[1], scale_eval_n, endpoint=True) starting_points_rotation = np.linspace( max((-180, min(rotation_bounds))), min((180, max(rotation_bounds))), rotation_eval_n, endpoint=True) elif search_method=='random': starting_points_scale = np.random.uniform( scale_bounds[0], scale_bounds[1], size=scale_eval_n) starting_points_rotation = np.random.uniform( max((-180, min(rotation_bounds))), min((180, max(rotation_bounds))), size=rotation_eval_n) # list which will contain the results iter_res = [] if (scale) & (rotate): for start_scale in starting_points_scale: for start_rotate in starting_points_rotation: iter_res.append(minimize(sim_opt_scale_rotate, x0=[start_scale, start_rotate], method=solver, bounds=[scale_bounds, rotation_bounds])) elif (scale) & (not rotate): for start_scale in starting_points_scale: iter_res.append(minimize(sim_opt_scale, x0=[start_scale], method=solver, bounds = [scale_bounds])) elif (not scale) & (rotate): for start_rotate in starting_points_rotation: iter_res.append(minimize(sim_opt_rotate, x0=[start_rotate], method=solver, bounds = [rotation_bounds])) fun_vals = np.array([i['fun'] for i in iter_res]) # first, get indices of iterations which reached the best solution min_fun_idx = fun_vals == np.min(fun_vals) # use this to extract possible scale and rotation solutions if (scale) & (rotate): poss_scale_vals = np.array([i['x'][0] for i in iter_res])[min_fun_idx] poss_rotate_vals = np.array([i['x'][1] for i in iter_res])[min_fun_idx] elif (scale) & (not rotate): poss_scale_vals = np.array([i['x'][0] for i in iter_res])[min_fun_idx] poss_rotate_vals = np.zeros(poss_scale_vals.shape) elif (not scale) & (rotate): poss_rotate_vals = np.array([i['x'][0] for i in iter_res])[min_fun_idx] poss_scale_vals = np.zeros(poss_rotate_vals.shape) # make sure the rotation values are all expressed within [-180, 180] instead of [0, inf] # (this is useful for minimising the angle when there are multiple identical solutions) poss_rotate_vals %= 360 poss_rotate_vals_dir = np.matrix([poss_rotate_vals, poss_rotate_vals-360]) poss_rotate_pw_idx = np.array(np.matrix.argmin(np.abs(poss_rotate_vals_dir), 0))[0] poss_rotate_vals = np.array([poss_rotate_vals_dir[poss_rotate_pw_idx[i], i] for i in range(poss_rotate_vals_dir.shape[1])]) # next, get the solutions of these with the smallest absolute scale (i.e., closest to original log scale value of zero) min_abs_scale_idx = np.abs(poss_scale_vals) == np.min(np.abs(poss_scale_vals)) poss_scale_vals = poss_scale_vals[min_abs_scale_idx] poss_rotate_vals = poss_rotate_vals[min_abs_scale_idx] # finally, get the solution, of those, with the smallest absolute rotation (i.e., closest to original rotation) min_abs_rotate_idx = np.abs(poss_rotate_vals) == np.min(np.abs(poss_rotate_vals)) poss_scale_vals = poss_scale_vals[min_abs_rotate_idx] poss_rotate_vals = poss_rotate_vals[min_abs_rotate_idx] # replicate the optimal values to extract the translation values sim_res = text_arr_sim(a, b_arr=b_arr, measure=measure, font_a=font_a, translate=translate, fliplr=fliplr, flipud=flipud, scale_val=np.exp(poss_scale_vals[0]), rotate_val=poss_rotate_vals[0], size=size, plot=plot, partial_wasserstein_kwargs=partial_wasserstein_kwargs, **kwargs) res = {'a':a, 'b':b, 'font_a':font_a, 'font_b':font_b, # settings for optimisation 'translate': translate, 'scale': scale, 'rotate': rotate, 'fliplr': None, 'flipud': None, # results from optimisation measure: sim_res[measure], # note that cold() output gives shift where indices refer to image indices, rather than array indices 'translate_val_x': sim_res['shift'][0], 'translate_val_y': sim_res['shift'][1], # the optimal scale and rotation values 'scale_val': np.exp(poss_scale_vals[0]), 'rotate_val': poss_rotate_vals[0], 'fliplr_val': fliplr, 'flipud_val': flipud} return(res) def opt_text_arr_sim(a='a', b=None, font_a='arial.ttf', font_b='arial.ttf', b_arr=None, measure='jaccard', translate=True, scale=True, rotate=True, fliplr=True, flipud=False, size=100, rotation_bounds=(-np.Infinity, np.Infinity), max_scale_change_factor=2.0, rotation_eval_n=9, scale_eval_n=9, solver='Nelder-Mead', search_method='grid', plot=False, partial_wasserstein_kwargs={'scale_mass':True, 'mass_normalise':True, 'distance_normalise': True, 'ins_weight':0.0, 'del_weight':0.0}, **kwargs): """Find parameters for geometric operations of translation, scale, rotation, and horizontal flipping that maximise overlap between two arrays of drawn text. Parameters ---------- a : str b : str, optional Must be defined if `b_arr` is not. Ignored if `b_arr` is defined. font_a : str, optional `.ttf` font to use to build text array from `a` font_b : str, optional `.ttf` font to use to build text array from `b` b_arr : ndarray, optional Array that the array built from `a` will be compared to. This option is included as it is faster to pre-build the array that `a` will be compared to, if applying this function in a loop. If both `b` and `b_arr` are defined, `b` will be ignored. measure: str Which measure to maximise (or minimise in the case of `px_dist`) to find the optimal overlap between arrays. Possible options are any metrics calculated by `arr_sim.arr_sim()`. translate : bool Should the translation operation be optimised? If `False`, will always use default positions. scale: bool Should scale be optimised? rotate : bool Should rotation be optimised? fliplr : bool Should horizontal flipping (mirroring) be optimised? fliplr : bool Should vertical flipping (mirroring) be optimised? size : int Size for the text (the scale parameter will be multiplied by this value). rotation_bounds : tuple Limits for optimising rotation in form `(lowerbound, upperbound)`. For example, `(-90, 90)` will limit rotation to 90 degrees in either direction. max_scale_change_factor : float Maximum value for the optimised scale parameter. `max_scale_change_factor=2` will permit 100% bigger or 50% smaller, i.e., twice as large or twice as small. rotation_eval_n : int How many starting values should be tried for optimising rotation? scale_eval_n : int How many starting values should be tried for optimising scale? solver : str Which solver to use? Possible values are those available to `scipy.optimize.minimize()`. search_method : str Method for setting starting values. Options are: 'grid': set in equal steps from the lower to the upper bound 'random': set randomly between the lower and upper bound plot : bool Should the optimal overlap be plotted? **kwargs Other arguments to pass to `text_arr_sim()`. Returns ------- dict A dictionary containing the following values: 'translate': Whether translation was optimised 'scale': Whether scale was optimised 'rotate': Whether rotation was optimised 'flip': Whether flip was optimised 'intersection', 'union', 'overlap', 'jaccard', 'dice': Values from `arr_sim.arr_sim()` 'translate_val_x': Optimal shift value in x dimension 'translate_val_y': Optimal shift value in y dimension 'scale_val': Optimal scale coefficient 'rotate_val': Optimal rotation coefficient 'flip_val': Whether the optimal solution included flipping Examples -------- >>> opt_text_arr_sim('d', 'p') {'a': 'd', 'b': 'p', 'font_a': 'arial.ttf', 'font_b': 'arial.ttf', 'translate': True, 'scale': True, 'rotate': True, 'fliplr': True, 'flipud': False, 'jaccard': 0.9708454810495627, 'translate_val_x': 0, 'translate_val_y': 0, 'scale_val': 1.0, 'rotate_val': 180.0, 'fliplr_val': False, 'flipud_val': False} >>> opt_text_arr_sim('d', 'q', flipud=True) {'a': 'd', 'b': 'q', 'font_a': 'arial.ttf', 'font_b': 'arial.ttf', 'translate': True, 'scale': True, 'rotate': True, 'fliplr': True, 'flipud': True, 'jaccard': 0.9600580973129993, 'translate_val_x': 0, 'translate_val_y': 0, 'scale_val': 1.0, 'rotate_val': 0.0, 'fliplr_val': False, 'flipud_val': True} >>> opt_text_arr_sim('e', 'o', flipud=True, measure='partial_wasserstein') """ non_flipped = _opt_text_arr_sim_flip_manual(a=a, b=b, font_a=font_a, font_b=font_b, b_arr=b_arr, measure=measure, translate=translate, scale=scale, rotate=rotate, fliplr=False, flipud=False, size=size, rotation_bounds=rotation_bounds, max_scale_change_factor=max_scale_change_factor, rotation_eval_n=rotation_eval_n, scale_eval_n=scale_eval_n, solver=solver, search_method=search_method, plot=False, partial_wasserstein_kwargs=partial_wasserstein_kwargs, **kwargs) res = non_flipped res['fliplr'] = False res['flipud'] = False if fliplr: flipped_lr = _opt_text_arr_sim_flip_manual(a=a, b=b, font_a=font_a, font_b=font_b, b_arr=b_arr, measure=measure, translate=translate, scale=scale, rotate=rotate, fliplr=True, flipud=False, size=size, rotation_bounds=rotation_bounds, max_scale_change_factor=max_scale_change_factor, rotation_eval_n=rotation_eval_n, scale_eval_n=scale_eval_n, solver=solver, search_method=search_method, plot=False, partial_wasserstein_kwargs=partial_wasserstein_kwargs, **kwargs) if flipped_lr[measure] > res[measure] and np.abs(flipped_lr['rotate_val']) <= np.abs(res['rotate_val']): res = flipped_lr if flipud: flipped_ud = _opt_text_arr_sim_flip_manual(a=a, b=b, font_a=font_a, font_b=font_b, b_arr=b_arr, measure=measure, translate=translate, scale=scale, rotate=rotate, fliplr=False, flipud=True, size=size, rotation_bounds=rotation_bounds, max_scale_change_factor=max_scale_change_factor, rotation_eval_n=rotation_eval_n, scale_eval_n=scale_eval_n, solver=solver, search_method=search_method, plot=False, partial_wasserstein_kwargs=partial_wasserstein_kwargs, **kwargs) if flipped_ud[measure] > res[measure] and np.abs(flipped_ud['rotate_val']) <= np.abs(res['rotate_val']): res = flipped_ud if fliplr and flipud: flipped_lrud = _opt_text_arr_sim_flip_manual(a=a, b=b, font_a=font_a, font_b=font_b, b_arr=b_arr, measure=measure, translate=translate, scale=scale, rotate=rotate, fliplr=True, flipud=True, size=size, rotation_bounds=rotation_bounds, max_scale_change_factor=max_scale_change_factor, rotation_eval_n=rotation_eval_n, scale_eval_n=scale_eval_n, solver=solver, search_method=search_method, plot=False, partial_wasserstein_kwargs=partial_wasserstein_kwargs, **kwargs) if flipped_ud[measure] > res[measure] and np.abs(flipped_lrud['rotate_val']) <= np.abs(res['rotate_val']): res = flipped_lrud res['fliplr'] = fliplr res['flipud'] = flipud if plot: # replicate the optimal values to plot if np.all(b_arr==None): b_arr = draw.text_array(b, font=font_b, rotate=0, fliplr=False, flipud=False, size=size, **kwargs) _ = text_arr_sim(a, b_arr=b_arr, measure=measure, font_a=font_a, translate=translate, fliplr=res['fliplr_val'], flipud=res['flipud_val'], scale_val=res['scale_val'], rotate_val=res['rotate_val'], size=size, plot=plot, partial_wasserstein_kwargs=partial_wasserstein_kwargs, **kwargs) return(res) def string_px_dist(a, b, **kwargs): """Calculate the pixel distance between entire, translation-aligned (via cross-correlation) strings. This function is just a wrapper for `draw.text_array()` and `arr_sim.px_dist()`. Parameters ---------- a : str b : str **kwargs Arguments passed to `draw.text_array()`. Same parameters are used for both strings. """ a_arr = draw.text_array(a, **kwargs) b_arr = draw.text_array(b, **kwargs) dist = arr_sim.px_dist(a_arr, b_arr) return(dist)