3 years ago · 5c9aab6b29
--- a/code/grid2csv.py
+++ b/code/grid2csv.py
@@ -25,6 +25,13 @@ speaker_type_dic = {
 
				 #selects TextGrid files only
			
 
				 files = Path(input_dir).glob('*.TextGrid')
			
 
				 
			
 
				+#initialize header
			
 
				+df = pd.DataFrame(columns= ["speaker_id","segment_offset","mwu_type","transcription","speaker_type",
			
 
				+                    "segment_onset","vcm_type","addresseee","raw_filename","ling_type","lex_type"])
			
 
				+
			
 
				+corpus_onset= 0
			
 
				+corpus_offset = 0
			
 
				+
			
 
				 #Loop through each textgrid file of the folder
			
 
				 for file in files:
			
 
				 
			
@@ -36,9 +43,8 @@ for file in files:
 
				     audio_onset = int(re.split("\D+", file.stem)[1])
			
 
				     audio_offset = int(re.split("\D+", file.stem)[2])
			
 
				 
			
 
				-    #initialize header
			
 
				-    df = pd.DataFrame(columns= ["speaker_id","segment_offset","mwu_type","transcription","speaker_type",
			
 
				-                    "segment_onset","vcm_type","addresseee","raw_filename","ling_type","lex_type"])
			
 
				+    if(audio_onset < corpus_onset): corpus_onset = audio_onset
			
 
				+    if(audio_offset > corpus_offset): corpus_offset = audio_offset
			
 
				 
			
 
				     #loop through all tiers
			
 
				     for tier in grid.get_tiers():
			
@@ -47,19 +53,21 @@ for file in files:
 
				             #populates dataframe
			
 
				             df = df.append(pd.Series({'speaker_id': tier.name, 'segment_onset': (interval[0]*1000 + audio_onset),
			
 
				                 'segment_offset': (interval[1]*1000 + audio_onset), 'transcription': interval[2]}), ignore_index= True)
			
 
				+                
			
 
				+                
			
 
				+#populates speaker_type column
			
 
				+df['speaker_type'] = df['speaker_id'].map(speaker_type_dic)
			
 
				     
			
 
				-    #populates speaker_type column
			
 
				-    df['speaker_type'] = df['speaker_id'].map(speaker_type_dic)
			
 
				+#enriches the csv with previously dropped entries corresponsing to the right timestamps
			
 
				+orig_df_subset = original_df[(original_df['segment_onset'] >= corpus_onset) &
			
 
				+                     (original_df['segment_offset'] <= corpus_offset)]
			
 
				 
			
 
				-    #enriches the csv with previously dropped entries corresponsing to the right timestamps
			
 
				-    orig_df_subset = original_df[(original_df['segment_onset'] >= audio_onset) &
			
 
				-                         (original_df['segment_offset'] <= audio_offset)]
			
 
				 
			
 
				-    df = df.append(orig_df_subset)
			
 
				+df = df.append(orig_df_subset)
			
 
				 
			
 
				-    #sort values by segment_onset before exporting
			
 
				-    df.sort_values(by='segment_onset', inplace= True)
			
 
				+#sort values by segment_onset before exporting
			
 
				+df.sort_values(by='segment_onset', inplace= True)
			
 
				 
			
 
				-    #exports to csv
			
 
				-    df.to_csv("{0}/{1}.csv".format(output_path, file.stem), mode = "x", na_rep= "NA", index= False)
			
 
				-    print("----------------SAVED!-----------------")
			
 
				+#exports to csv
			
 
				+df.to_csv("{0}/BN.csv".format(output_path), mode = "x", na_rep= "NA", index= False)
			
 
				+print("----------------SAVED!-----------------")
			
--- a/outputs/grid2csv_output/BN-0-727249.csv
+++ b/outputs/grid2csv_output/BN-0-727249.csv
@@ -1 +0,0 @@
 
				-../../.git/annex/objects/k1/85/MD5E-s104298--03972dfb2675594715bd6929eaf313e3.csv/MD5E-s104298--03972dfb2675594715bd6929eaf313e3.csv
			
--- a/outputs/grid2csv_output/BN-1297003-1913555.csv
+++ b/outputs/grid2csv_output/BN-1297003-1913555.csv
@@ -1 +0,0 @@
 
				-../../.git/annex/objects/G6/FV/MD5E-s122253--5819007f4d16526ec30ed07891ede1cf.csv/MD5E-s122253--5819007f4d16526ec30ed07891ede1cf.csv
			
--- a/outputs/grid2csv_output/BN-727249-1297003.csv
+++ b/outputs/grid2csv_output/BN-727249-1297003.csv
@@ -1 +0,0 @@
 
				-../../.git/annex/objects/0g/2V/MD5E-s105529--f99080b1a555ede680f060c467ba9dfa.csv/MD5E-s105529--f99080b1a555ede680f060c467ba9dfa.csv
			
--- a/outputs/grid2csv_output/BN.csv
+++ b/outputs/grid2csv_output/BN.csv
@@ -0,0 +1 @@
 
				+../../.git/annex/objects/fj/jq/MD5E-s331822--b7aa0a9ace55505b526d7ba4b370f4f9.csv/MD5E-s331822--b7aa0a9ace55505b526d7ba4b370f4f9.csv
		`@@ -1 +0,0 @@`
		`-../../.git/annex/objects/k1/85/MD5E-s104298--03972dfb2675594715bd6929eaf313e3.csv/MD5E-s104298--03972dfb2675594715bd6929eaf313e3.csv`
		`@@ -1 +0,0 @@`
		`-../../.git/annex/objects/G6/FV/MD5E-s122253--5819007f4d16526ec30ed07891ede1cf.csv/MD5E-s122253--5819007f4d16526ec30ed07891ede1cf.csv`
		`@@ -1 +0,0 @@`
		`-../../.git/annex/objects/0g/2V/MD5E-s105529--f99080b1a555ede680f060c467ba9dfa.csv/MD5E-s105529--f99080b1a555ede680f060c467ba9dfa.csv`
		`@@ -0,0 +1 @@`
		`+../../.git/annex/objects/fj/jq/MD5E-s331822--b7aa0a9ace55505b526d7ba4b370f4f9.csv/MD5E-s331822--b7aa0a9ace55505b526d7ba4b370f4f9.csv`