Browse Source

re-downloaded childes data

yaya-sy 1 year ago
parent
commit
6e9ca5f21b

BIN
code/__pycache__/utterances_cleaner.cpython-310.pyc


+ 3 - 2
code/download_childes_corpora.py

@@ -54,7 +54,7 @@ class DownloadChildCorpora :
                             chat,
                             participants_to_consider: List[str],
                             phonemize_child: bool,
-                            ) -> Generator[str, str, float, List[str]]:
+                            ) -> Generator:
         """
         Get the data for each participant. Here, the data for each participant\
         is the set of utterances produced by this participant at all child ages.
@@ -201,7 +201,8 @@ class DownloadChildCorpora :
                                                                     participant,
                                                                     backend,
                                                                     phonemize_child=phonemize_child) :
-                        utterance = self.utterances_cleaner.clean(utterance)
+                        # utterance = " ".join(phon for word in utterance.split("@") for phon in word.split("$") if phon)
+                        # utterance = self.utterances_cleaner.clean(utterance)
                         utterance = self.utterances_cleaner.remove_multiple_spaces(utterance)
                         if not utterance :
                             continue

+ 1 - 0
commands_reproduction.txt

@@ -0,0 +1 @@
+.git/annex/objects/8v/Zm/MD5E-s459--11999fdb245d2931764986dd3e7ee155.txt/MD5E-s459--11999fdb245d2931764986dd3e7ee155.txt

+ 1 - 1
datasets/childes_json_corpora/da.json

@@ -1 +1 @@
-../../.git/annex/objects/vM/x5/MD5E-s2337240--e290f2a0b859321b976231b16975874b.json/MD5E-s2337240--e290f2a0b859321b976231b16975874b.json
+../../.git/annex/objects/wM/J4/MD5E-s2940316--a8934de46f3d6e2096f15f7f096e92b0.json/MD5E-s2940316--a8934de46f3d6e2096f15f7f096e92b0.json

+ 1 - 1
datasets/childes_json_corpora/de.json

@@ -1 +1 @@
-../../.git/annex/objects/vp/q4/MD5E-s35877439--f12ba95f73dedd9db8730afa68f41b1a.json/MD5E-s35877439--f12ba95f73dedd9db8730afa68f41b1a.json
+../../.git/annex/objects/gx/kK/MD5E-s46328318--41a6db192dc968c831890df8073f4797.json/MD5E-s46328318--41a6db192dc968c831890df8073f4797.json

+ 1 - 1
datasets/childes_json_corpora/en.json

@@ -1 +1 @@
-../../.git/annex/objects/vq/40/MD5E-s26156558--dde71475bb19d3229cb12096ad6cf2ce.json/MD5E-s26156558--dde71475bb19d3229cb12096ad6cf2ce.json
+../../.git/annex/objects/7V/1j/MD5E-s32520507--e5ba1e7c1bfa0ed106862bef65784026.json/MD5E-s32520507--e5ba1e7c1bfa0ed106862bef65784026.json

+ 1 - 1
datasets/childes_json_corpora/es.json

@@ -1 +1 @@
-../../.git/annex/objects/6X/F5/MD5E-s4840652--24dd0c5dce5dd473fb4b9096f868c61c.json/MD5E-s4840652--24dd0c5dce5dd473fb4b9096f868c61c.json
+../../.git/annex/objects/pv/Pf/MD5E-s7544886--e1f000f799dad171c34f6cc47dd0656e.json/MD5E-s7544886--e1f000f799dad171c34f6cc47dd0656e.json

+ 1 - 1
datasets/childes_json_corpora/et.json

@@ -1 +1 @@
-../../.git/annex/objects/G0/q6/MD5E-s5733971--267459710a0f750e31d253cb273c9d35.json/MD5E-s5733971--267459710a0f750e31d253cb273c9d35.json
+../../.git/annex/objects/k8/vv/MD5E-s8521517--d3661c988b3c4298fca6c6bf53a1e50c.json/MD5E-s8521517--d3661c988b3c4298fca6c6bf53a1e50c.json

+ 1 - 1
datasets/childes_json_corpora/eu.json

@@ -1 +1 @@
-../../.git/annex/objects/98/Mq/MD5E-s1053505--bbb9bf5ef9161af40c2ae211e6ce8659.json/MD5E-s1053505--bbb9bf5ef9161af40c2ae211e6ce8659.json
+../../.git/annex/objects/43/3v/MD5E-s1472846--a18f0bbd772ee30982d7a6698dce2107.json/MD5E-s1472846--a18f0bbd772ee30982d7a6698dce2107.json

+ 1 - 1
datasets/childes_json_corpora/fr.json

@@ -1 +1 @@
-../../.git/annex/objects/Xg/7k/MD5E-s2101022--18ab21078fc545a74113d5ba36e3c9c1.json/MD5E-s2101022--18ab21078fc545a74113d5ba36e3c9c1.json
+../../.git/annex/objects/jq/Vx/MD5E-s3234823--ca293387898ab63ac7c7794b873f9e0c.json/MD5E-s3234823--ca293387898ab63ac7c7794b873f9e0c.json

+ 1 - 1
datasets/childes_json_corpora/ja.json

@@ -1 +1 @@
-../../.git/annex/objects/j9/wz/MD5E-s29045222--877c73d709b18c9e0347d58938083516.json/MD5E-s29045222--877c73d709b18c9e0347d58938083516.json
+../../.git/annex/objects/xx/VW/MD5E-s471--6d9bc19b94aff794c0a23151db4eb4fd.json/MD5E-s471--6d9bc19b94aff794c0a23151db4eb4fd.json

+ 1 - 1
datasets/childes_json_corpora/pl.json

@@ -1 +1 @@
-../../.git/annex/objects/gw/vK/MD5E-s13059577--3aa4cabf95e8b8656fa85c6561d4fc07.json/MD5E-s13059577--3aa4cabf95e8b8656fa85c6561d4fc07.json
+../../.git/annex/objects/v0/qZ/MD5E-s17536902--3ae284fc8e315cdb5ff45ea0f68f5732.json/MD5E-s17536902--3ae284fc8e315cdb5ff45ea0f68f5732.json

+ 1 - 1
datasets/childes_json_corpora/pt.json

@@ -1 +1 @@
-../../.git/annex/objects/V0/zV/MD5E-s4492406--e9e253c653ac86c0aa28a61c0e2778f2.json/MD5E-s4492406--e9e253c653ac86c0aa28a61c0e2778f2.json
+../../.git/annex/objects/6k/0G/MD5E-s6191044--0998067d5cdac6b67d219afaa092d48c.json/MD5E-s6191044--0998067d5cdac6b67d219afaa092d48c.json

+ 1 - 1
datasets/childes_json_corpora/sr.json

@@ -1 +1 @@
-../../.git/annex/objects/pk/z3/MD5E-s5896447--11bcd255365b02df7aa8771586f73e94.json/MD5E-s5896447--11bcd255365b02df7aa8771586f73e94.json
+../../.git/annex/objects/7J/1p/MD5E-s8135600--6a80c3fbc662975ad3c67a668ce7e062.json/MD5E-s8135600--6a80c3fbc662975ad3c67a668ce7e062.json

+ 1 - 1
datasets/childes_json_corpora/tr.json

@@ -1 +1 @@
-../../.git/annex/objects/37/02/MD5E-s679401--d1a3fc03edc30eff14aa62d9b70d9e0f.json/MD5E-s679401--d1a3fc03edc30eff14aa62d9b70d9e0f.json
+../../.git/annex/objects/6x/Qv/MD5E-s915346--3d36f2449cdcbd36acf882542bcefa0c.json/MD5E-s915346--3d36f2449cdcbd36acf882542bcefa0c.json