123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198 |
- # %%
- # imports
- import os.path as op
- import pandas as pd
- import numpy as np
- from string import ascii_lowercase
- chars = [*ascii_lowercase, 'ä', 'ö', 'ü', 'ß']
- import matplotlib.pyplot as plt
- from mpl_toolkits.axes_grid1 import make_axes_locatable
- fig_font_size = 8
- fig_font_size_small = 8
- plt.rcParams.update({
- "text.usetex": False,
- "font.family": "Helvetica",
- 'font.size': fig_font_size
- })
- def rankify_vec(vec):
- # rank values in a vector
- # (ties have their ranks averaged, as in R)
- rank_vec = vec.argsort().argsort() + 1
- rank_vec = rank_vec.astype(float)
- for uv in np.unique(vec):
- uv_idx = vec==uv
- if np.sum(uv_idx)>1:
- rank_vec[uv_idx] = np.mean(rank_vec[uv_idx])
- return rank_vec
- def rankify_sq_mat(mat):
- lwr = mat[np.tril_indices(n=mat.shape[0], k=-1)]
- lwr_rank = rankify_vec(lwr)
- mat_rank = np.zeros(mat.shape)
- mat_rank[np.tril_indices(n=mat.shape[0], k=-1)] = lwr_rank
- mat_rank[np.triu_indices(n=mat.shape[0], k=0)] = mat_rank.T[np.triu_indices(n=mat.shape[0], k=0)]
- mat_rank[np.diag_indices(n=mat_rank.shape[0])] = np.nan
- return mat_rank
- #%matplotlib qt
- # %%
- # import the features
- sz_feat_df = pd.read_csv( op.join('..', 'stim_sim', 'complexity', 'complexity_features.csv') )
- f_feat_df = pd.read_csv( op.join('..', 'stim_sim', 'frequency', 'frequency_features.csv') )
- ph_feat_df = pd.read_csv( op.join('..', 'stim_sim', 'phonology', 'dominant_phonemes_features.csv') )
- phn_feat_df = pd.read_csv( op.join('..', 'stim_sim', 'phonology', 'letter_names_features.csv') )
- # %%
- # import the RDMs
- sz_df = pd.read_csv( op.join('..', 'stim_sim', 'complexity', 'complexity.csv') )
- sz_mat = np.load( op.join('..', 'stim_sim', 'complexity', 'complexity.npy') )
- f_df = pd.read_csv( op.join('..', 'stim_sim', 'frequency', 'frequency.csv') )
- f_mat = np.load( op.join('..', 'stim_sim', 'frequency', 'frequency.npy') )
- ph_df = pd.read_csv( op.join('..', 'stim_sim', 'phonology', 'dominant_phonemes.csv') )
- ph_mat = np.load( op.join('..', 'stim_sim', 'phonology', 'dominant_phonemes.npy') )
- phn_df = pd.read_csv( op.join('..', 'stim_sim', 'phonology', 'letter_names.csv') )
- phn_mat = np.load( op.join('..', 'stim_sim', 'phonology', 'letter_names.npy') )
- # %%
- # build the plot
- fig, axs = plt.subplots(4, 4, figsize=(6, 4.8))
- fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.75, hspace=0.55)
- # titles
- # axs[0, 0].set_ylabel('Letter Size\n(Pixel Sum)', rotation='horizontal', va='center')
- # axs[1, 0].set_ylabel('Letter Frequency\n(SUBTLEX-DE Count)')
- # axs[2, 0].set_ylabel('Dominant Phonemes\n(PanPhon Features)')
- # axs[3, 0].set_ylabel('Letter Name Phonemes\n(PanPhon Features)')
- titles = ['Letter Size\n(Pixel Sum)',
- 'Letter\nFrequency\n(SUBTLEX-DE\nCount)',
- 'Dominant\nPhonemes\n(PanPhon\nFeatures)',
- 'Letter Name\nPhonemes\n(PanPhon\nFeatures)']
- for i, t in enumerate(titles):
- axs[i, 0].text(0.75, 0.5, t,
- transform=axs[i, 0].transAxes,
- size=fig_font_size,
- verticalalignment='center',
- horizontalalignment='center')
- axs[i, 0].axis('off')
- axs[0, 1].set_title('Features', fontsize=fig_font_size)
- axs[0, 2].set_title('Raw RDMs', fontsize=fig_font_size)
- axs[0, 3].set_title('Rank RDMs', fontsize=fig_font_size)
- # size
- axs[0, 1].bar(sz_feat_df.char, sz_feat_df.pixel_sum, width=1.0, color='black')
- axs[0, 1].set_yticks([0, 2500, 5000])
- sz_rdm_im = axs[0, 2].imshow(sz_mat, interpolation='none')
- divider = make_axes_locatable(axs[0, 2])
- cax = divider.append_axes('right', size='5%', pad=0.05)
- fig.colorbar(sz_rdm_im, cax=cax, orientation='vertical', ticks=[0, 2000, 4000])
- sz_rank_im = axs[0, 3].imshow(rankify_sq_mat(sz_mat), interpolation='none')
- divider = make_axes_locatable(axs[0, 3])
- cax = divider.append_axes('right', size='5%', pad=0.05)
- fig.colorbar(sz_rank_im, cax=cax, orientation='vertical', ticks=[1, 435])
- # frequency
- axs[1, 1].bar(f_feat_df.char, f_feat_df.freq, width=1.0, color='black')
- axs[1, 1].set_yticks(np.array([0, 0.5, 1, 1.5])*1e7)
- f_rdm_im = axs[1, 2].imshow(f_mat, interpolation='none')
- divider = make_axes_locatable(axs[1, 2])
- cax = divider.append_axes('right', size='5%', pad=0.05)
- fig.colorbar(f_rdm_im, cax=cax, orientation='vertical', ticks=np.array([0, 0.4, 0.8, 1.2])*1e7)
- f_rank_im = axs[1, 3].imshow(rankify_sq_mat(f_mat), interpolation='none')
- divider = make_axes_locatable(axs[1, 3])
- cax = divider.append_axes('right', size='5%', pad=0.05)
- fig.colorbar(f_rank_im, cax=cax, orientation='vertical', ticks=[1, 435])
- # dominant phonemes
- ph_feat_mat = ph_feat_df.loc[:, ph_feat_df.columns!='char'].to_numpy().T
- ph_im = axs[2, 1].imshow(ph_feat_mat, interpolation='none', aspect='auto', cmap='coolwarm', vmin=-1, vmax=1)
- axs[2, 1].set_yticks([0, 5, 11, 17, 23])
- axs[2, 1].set_yticklabels([1, 6, 12, 18, 24])
- divider = make_axes_locatable(axs[2, 1])
- cax = divider.append_axes('right', size='5%', pad=0.05)
- fig.colorbar(ph_im, cax=cax, orientation='vertical')
- ph_rdm_im = axs[2, 2].imshow(ph_mat, interpolation='none')
- divider = make_axes_locatable(axs[2, 2])
- cax = divider.append_axes('right', size='5%', pad=0.05)
- fig.colorbar(ph_rdm_im, cax=cax, orientation='vertical', ticks=[0, 0.4, 0.8, 1.2])
- ph_rank_im = axs[2, 3].imshow(rankify_sq_mat(ph_mat), interpolation='none', vmin=1, vmax=435)
- divider = make_axes_locatable(axs[2, 3])
- cax = divider.append_axes('right', size='5%', pad=0.05)
- fig.colorbar(ph_rank_im, cax=cax, orientation='vertical', ticks=[1, 435])
- # letter names
- phn_feat_mat = phn_feat_df.loc[:, phn_feat_df.columns!='char'].to_numpy().T
- phn_im = axs[3, 1].imshow(phn_feat_mat, interpolation='none', aspect='auto', cmap='coolwarm', vmin=-1, vmax=1)
- axs[3, 1].set_yticks([0, 5, 11, 17, 23])
- axs[3, 1].set_yticklabels([1, 6, 12, 18, 24])
- divider = make_axes_locatable(axs[3, 1])
- cax = divider.append_axes('right', size='5%', pad=0.05)
- fig.colorbar(phn_im, cax=cax, orientation='vertical')
- phn_rdm_im = axs[3, 2].imshow(phn_mat, interpolation='none')
- divider = make_axes_locatable(axs[3, 2])
- cax = divider.append_axes('right', size='5%', pad=0.05)
- fig.colorbar(phn_rdm_im, cax=cax, orientation='vertical', ticks=[0, 0.5, 1])
- phn_rank_im = axs[3, 3].imshow(rankify_sq_mat(phn_mat), interpolation='none')
- divider = make_axes_locatable(axs[3, 3])
- cax = divider.append_axes('right', size='5%', pad=0.05)
- fig.colorbar(phn_rank_im, cax=cax, orientation='vertical', ticks=[1, 435])
- # axes labels for characters
- for i in range(4):
- axs[i, 1].set_xticks([0, 14, 29])
- axs[i, 1].set_xticklabels(['a', '...', 'ß'])
- xticks = axs[i, 1].xaxis.get_major_ticks()
- xticks[1].get_children()[0].set_visible(False)
- for i in range(4):
- for j in range(2, 4):
- axs[i, j].set_xticks([0, 14, 29])
- axs[i, j].set_yticks([0, 14, 29])
- axs[i, j].set_xticklabels(['a', '...', 'ß'])
- axs[i, j].set_yticklabels(['a', '...', 'ß'])
- xticks = axs[i, j].xaxis.get_major_ticks()
- xticks[1].get_children()[0].set_visible(False)
- yticks = axs[i, j].yaxis.get_major_ticks()
- yticks[1].get_children()[0].set_visible(False)
- yticks[1].get_children()[3].set_rotation(90)
- fig.savefig(op.join('..', 'fig', 'illustrate_controls.pdf'))
- fig.savefig(op.join('..', 'fig', 'illustrate_controls.png'))
|