Browse Source

premieres configurations

yaya-sy 1 year ago
parent
commit
d0fa2c1003
51 changed files with 266 additions and 0 deletions
  1. 2 0
      .gitattributes
  2. 1 0
      datasets/childes_json_corpora/da.json
  3. 1 0
      datasets/childes_json_corpora/de.json
  4. 1 0
      datasets/childes_json_corpora/en.json
  5. 1 0
      datasets/childes_json_corpora/es.json
  6. 1 0
      datasets/childes_json_corpora/et.json
  7. 1 0
      datasets/childes_json_corpora/eu.json
  8. 1 0
      datasets/childes_json_corpora/fr.json
  9. 1 0
      datasets/childes_json_corpora/ja.json
  10. 1 0
      datasets/childes_json_corpora/pl.json
  11. 1 0
      datasets/childes_json_corpora/pt.json
  12. 1 0
      datasets/childes_json_corpora/sr.json
  13. 1 0
      datasets/childes_json_corpora/tr.json
  14. 1 0
      datasets/train_dev_opensubtitles/dev/da.one_sentence_per_line
  15. 1 0
      datasets/train_dev_opensubtitles/dev/de.one_sentence_per_line
  16. 1 0
      datasets/train_dev_opensubtitles/dev/en.one_sentence_per_line
  17. 1 0
      datasets/train_dev_opensubtitles/dev/es.one_sentence_per_line
  18. 1 0
      datasets/train_dev_opensubtitles/dev/et.one_sentence_per_line
  19. 1 0
      datasets/train_dev_opensubtitles/dev/eu.one_sentence_per_line
  20. 1 0
      datasets/train_dev_opensubtitles/dev/fr.one_sentence_per_line
  21. 1 0
      datasets/train_dev_opensubtitles/dev/ja.one_sentence_per_line
  22. 1 0
      datasets/train_dev_opensubtitles/dev/pl.one_sentence_per_line
  23. 1 0
      datasets/train_dev_opensubtitles/dev/pt.one_sentence_per_line
  24. 1 0
      datasets/train_dev_opensubtitles/dev/sr.one_sentence_per_line
  25. 1 0
      datasets/train_dev_opensubtitles/dev/tr.one_sentence_per_line
  26. 1 0
      datasets/train_dev_opensubtitles/train/da.one_sentence_per_line
  27. 1 0
      datasets/train_dev_opensubtitles/train/de.one_sentence_per_line
  28. 1 0
      datasets/train_dev_opensubtitles/train/en.one_sentence_per_line
  29. 1 0
      datasets/train_dev_opensubtitles/train/es.one_sentence_per_line
  30. 1 0
      datasets/train_dev_opensubtitles/train/et.one_sentence_per_line
  31. 1 0
      datasets/train_dev_opensubtitles/train/eu.one_sentence_per_line
  32. 1 0
      datasets/train_dev_opensubtitles/train/fr.one_sentence_per_line
  33. 1 0
      datasets/train_dev_opensubtitles/train/ja.one_sentence_per_line
  34. 1 0
      datasets/train_dev_opensubtitles/train/pl.one_sentence_per_line
  35. 1 0
      datasets/train_dev_opensubtitles/train/pt.one_sentence_per_line
  36. 1 0
      datasets/train_dev_opensubtitles/train/sr.one_sentence_per_line
  37. 1 0
      datasets/train_dev_opensubtitles/train/tr.one_sentence_per_line
  38. 1 0
      datasets/train_dev_opensubtitles/words/da.one_sentence_per_line
  39. 1 0
      datasets/train_dev_opensubtitles/words/de.one_sentence_per_line
  40. 1 0
      datasets/train_dev_opensubtitles/words/en.one_sentence_per_line
  41. 1 0
      datasets/train_dev_opensubtitles/words/es.one_sentence_per_line
  42. 1 0
      datasets/train_dev_opensubtitles/words/et.one_sentence_per_line
  43. 1 0
      datasets/train_dev_opensubtitles/words/eu.one_sentence_per_line
  44. 1 0
      datasets/train_dev_opensubtitles/words/fr.one_sentence_per_line
  45. 1 0
      datasets/train_dev_opensubtitles/words/ja.one_sentence_per_line
  46. 1 0
      datasets/train_dev_opensubtitles/words/pl.one_sentence_per_line
  47. 1 0
      datasets/train_dev_opensubtitles/words/pt.one_sentence_per_line
  48. 1 0
      datasets/train_dev_opensubtitles/words/sr.one_sentence_per_line
  49. 1 0
      datasets/train_dev_opensubtitles/words/tr.one_sentence_per_line
  50. 157 0
      extra/languages_to_download_informations.yaml
  51. 59 0
      extra/markers.json

+ 2 - 0
.gitattributes

@@ -2,3 +2,5 @@
 **/.git* annex.largefiles=nothing
 CHANGELOG.md annex.largefiles=nothing
 README.md annex.largefiles=nothing
+code/* annex.largefiles=nothing
+extra/* annex.largefiles=nothing

+ 1 - 0
datasets/childes_json_corpora/da.json

@@ -0,0 +1 @@
+../../.git/annex/objects/vM/x5/MD5E-s2337240--e290f2a0b859321b976231b16975874b.json/MD5E-s2337240--e290f2a0b859321b976231b16975874b.json

+ 1 - 0
datasets/childes_json_corpora/de.json

@@ -0,0 +1 @@
+../../.git/annex/objects/vp/q4/MD5E-s35877439--f12ba95f73dedd9db8730afa68f41b1a.json/MD5E-s35877439--f12ba95f73dedd9db8730afa68f41b1a.json

+ 1 - 0
datasets/childes_json_corpora/en.json

@@ -0,0 +1 @@
+../../.git/annex/objects/vq/40/MD5E-s26156558--dde71475bb19d3229cb12096ad6cf2ce.json/MD5E-s26156558--dde71475bb19d3229cb12096ad6cf2ce.json

+ 1 - 0
datasets/childes_json_corpora/es.json

@@ -0,0 +1 @@
+../../.git/annex/objects/6X/F5/MD5E-s4840652--24dd0c5dce5dd473fb4b9096f868c61c.json/MD5E-s4840652--24dd0c5dce5dd473fb4b9096f868c61c.json

+ 1 - 0
datasets/childes_json_corpora/et.json

@@ -0,0 +1 @@
+../../.git/annex/objects/G0/q6/MD5E-s5733971--267459710a0f750e31d253cb273c9d35.json/MD5E-s5733971--267459710a0f750e31d253cb273c9d35.json

+ 1 - 0
datasets/childes_json_corpora/eu.json

@@ -0,0 +1 @@
+../../.git/annex/objects/98/Mq/MD5E-s1053505--bbb9bf5ef9161af40c2ae211e6ce8659.json/MD5E-s1053505--bbb9bf5ef9161af40c2ae211e6ce8659.json

+ 1 - 0
datasets/childes_json_corpora/fr.json

@@ -0,0 +1 @@
+../../.git/annex/objects/Xg/7k/MD5E-s2101022--18ab21078fc545a74113d5ba36e3c9c1.json/MD5E-s2101022--18ab21078fc545a74113d5ba36e3c9c1.json

+ 1 - 0
datasets/childes_json_corpora/ja.json

@@ -0,0 +1 @@
+../../.git/annex/objects/j9/wz/MD5E-s29045222--877c73d709b18c9e0347d58938083516.json/MD5E-s29045222--877c73d709b18c9e0347d58938083516.json

+ 1 - 0
datasets/childes_json_corpora/pl.json

@@ -0,0 +1 @@
+../../.git/annex/objects/gw/vK/MD5E-s13059577--3aa4cabf95e8b8656fa85c6561d4fc07.json/MD5E-s13059577--3aa4cabf95e8b8656fa85c6561d4fc07.json

+ 1 - 0
datasets/childes_json_corpora/pt.json

@@ -0,0 +1 @@
+../../.git/annex/objects/V0/zV/MD5E-s4492406--e9e253c653ac86c0aa28a61c0e2778f2.json/MD5E-s4492406--e9e253c653ac86c0aa28a61c0e2778f2.json

+ 1 - 0
datasets/childes_json_corpora/sr.json

@@ -0,0 +1 @@
+../../.git/annex/objects/pk/z3/MD5E-s5896447--11bcd255365b02df7aa8771586f73e94.json/MD5E-s5896447--11bcd255365b02df7aa8771586f73e94.json

+ 1 - 0
datasets/childes_json_corpora/tr.json

@@ -0,0 +1 @@
+../../.git/annex/objects/37/02/MD5E-s679401--d1a3fc03edc30eff14aa62d9b70d9e0f.json/MD5E-s679401--d1a3fc03edc30eff14aa62d9b70d9e0f.json

+ 1 - 0
datasets/train_dev_opensubtitles/dev/da.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/7X/wF/MD5E-s628135--09500a5c30f324db1ac655fdcd698057/MD5E-s628135--09500a5c30f324db1ac655fdcd698057

+ 1 - 0
datasets/train_dev_opensubtitles/dev/de.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/WG/KJ/MD5E-s688449--67c5d38a3483c32089345dbd1f9b8841/MD5E-s688449--67c5d38a3483c32089345dbd1f9b8841

+ 1 - 0
datasets/train_dev_opensubtitles/dev/en.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/7W/1W/MD5E-s625461--d999aa024465427f7747514778b9cd31/MD5E-s625461--d999aa024465427f7747514778b9cd31

+ 1 - 0
datasets/train_dev_opensubtitles/dev/es.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/fQ/gq/MD5E-s611453--160fbd2fd42e4676a5a0dd8284529f7b/MD5E-s611453--160fbd2fd42e4676a5a0dd8284529f7b

+ 1 - 0
datasets/train_dev_opensubtitles/dev/et.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/KP/JZ/MD5E-s606641--dbf47a6784f1a02aeb641901981ca620/MD5E-s606641--dbf47a6784f1a02aeb641901981ca620

+ 1 - 0
datasets/train_dev_opensubtitles/dev/eu.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/Zk/X9/MD5E-s646723--030d017208f278d06ef8073090cfe20c/MD5E-s646723--030d017208f278d06ef8073090cfe20c

+ 1 - 0
datasets/train_dev_opensubtitles/dev/fr.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/FZ/g8/MD5E-s513871--3ef2f0efe98abffab417682447b3f226/MD5E-s513871--3ef2f0efe98abffab417682447b3f226

+ 1 - 0
datasets/train_dev_opensubtitles/dev/ja.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/19/4m/MD5E-s1611113--135e9eda094b10ab4194865f9965f886/MD5E-s1611113--135e9eda094b10ab4194865f9965f886

+ 1 - 0
datasets/train_dev_opensubtitles/dev/pl.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/V3/5f/MD5E-s662618--9b7c74396bd7086fe5ff2a0efb7b7f9f/MD5E-s662618--9b7c74396bd7086fe5ff2a0efb7b7f9f

+ 1 - 0
datasets/train_dev_opensubtitles/dev/pt.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/xf/Q6/MD5E-s704164--907624e97f8f81335629f133d6fed6e8/MD5E-s704164--907624e97f8f81335629f133d6fed6e8

+ 1 - 0
datasets/train_dev_opensubtitles/dev/sr.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/0z/91/MD5E-s599488--3f73eb78ff33b2313351ba75b40e76f8/MD5E-s599488--3f73eb78ff33b2313351ba75b40e76f8

+ 1 - 0
datasets/train_dev_opensubtitles/dev/tr.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/8x/3p/MD5E-s677913--f5b8f8f467ffd6a9a6b518832abc35ca/MD5E-s677913--f5b8f8f467ffd6a9a6b518832abc35ca

+ 1 - 0
datasets/train_dev_opensubtitles/train/da.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/xG/Qq/MD5E-s31099426--217a8f30125f412b2b62b886b04805d3/MD5E-s31099426--217a8f30125f412b2b62b886b04805d3

+ 1 - 0
datasets/train_dev_opensubtitles/train/de.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/6m/GZ/MD5E-s34414006--99464e797d913dd95e26aaaa56953603/MD5E-s34414006--99464e797d913dd95e26aaaa56953603

+ 1 - 0
datasets/train_dev_opensubtitles/train/en.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/WV/J0/MD5E-s31351708--b737f19c2dababb98835ba4b4f0e523a/MD5E-s31351708--b737f19c2dababb98835ba4b4f0e523a

+ 1 - 0
datasets/train_dev_opensubtitles/train/es.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/g5/vK/MD5E-s30317866--f8606c40818dae33f2afcb0411d30649/MD5E-s30317866--f8606c40818dae33f2afcb0411d30649

+ 1 - 0
datasets/train_dev_opensubtitles/train/et.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/w6/J4/MD5E-s30302587--2df9e11b5757fb9e929a0e7aed5a246a/MD5E-s30302587--2df9e11b5757fb9e929a0e7aed5a246a

+ 1 - 0
datasets/train_dev_opensubtitles/train/eu.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/Kq/7m/MD5E-s32429199--d34a59b568354a7a32d9290f3c53fefa/MD5E-s32429199--d34a59b568354a7a32d9290f3c53fefa

+ 1 - 0
datasets/train_dev_opensubtitles/train/fr.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/0W/26/MD5E-s25810117--38651bddeab03ca251207953a6d1f054/MD5E-s25810117--38651bddeab03ca251207953a6d1f054

+ 1 - 0
datasets/train_dev_opensubtitles/train/ja.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/Vg/v3/MD5E-s81431394--4d7d472cbea6cd74093005fb02d3790f/MD5E-s81431394--4d7d472cbea6cd74093005fb02d3790f

+ 1 - 0
datasets/train_dev_opensubtitles/train/pl.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/Qp/J3/MD5E-s33216341--cd691f241e1401ec2d2148f7922457e8/MD5E-s33216341--cd691f241e1401ec2d2148f7922457e8

+ 1 - 0
datasets/train_dev_opensubtitles/train/pt.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/3K/Vz/MD5E-s35268500--cb9c6784b5d9c397b1acf85fe62a069f/MD5E-s35268500--cb9c6784b5d9c397b1acf85fe62a069f

+ 1 - 0
datasets/train_dev_opensubtitles/train/sr.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/17/zW/MD5E-s30064150--a2cd6891c5356f12afdfc1187f89a036/MD5E-s30064150--a2cd6891c5356f12afdfc1187f89a036

+ 1 - 0
datasets/train_dev_opensubtitles/train/tr.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/Kg/k2/MD5E-s33675173--5de00ab7397c3cf808d7dcb6ab2f5115/MD5E-s33675173--5de00ab7397c3cf808d7dcb6ab2f5115

+ 1 - 0
datasets/train_dev_opensubtitles/words/da.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/P8/qq/MD5E-s40842094--5e414ba9258d198e4c16a418d21b67de/MD5E-s40842094--5e414ba9258d198e4c16a418d21b67de

+ 1 - 0
datasets/train_dev_opensubtitles/words/de.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/q1/g4/MD5E-s39490897--cad7d85081ffde5ef56405d24c68c682/MD5E-s39490897--cad7d85081ffde5ef56405d24c68c682

+ 1 - 0
datasets/train_dev_opensubtitles/words/en.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/Fq/pZ/MD5E-s57888856--a82069bcea0a269d92cbbbbe672a4d88/MD5E-s57888856--a82069bcea0a269d92cbbbbe672a4d88

+ 1 - 0
datasets/train_dev_opensubtitles/words/es.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/f5/14/MD5E-s41007348--440c56d371eb18aca5ce834982c43aad/MD5E-s41007348--440c56d371eb18aca5ce834982c43aad

+ 1 - 0
datasets/train_dev_opensubtitles/words/et.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/Zq/1W/MD5E-s41552139--97c3f7451c3512dd06e3ae273490ba44/MD5E-s41552139--97c3f7451c3512dd06e3ae273490ba44

+ 1 - 0
datasets/train_dev_opensubtitles/words/eu.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/f0/p4/MD5E-s26601340--e0f57b293612bd2424307139fe8a189b/MD5E-s26601340--e0f57b293612bd2424307139fe8a189b

+ 1 - 0
datasets/train_dev_opensubtitles/words/fr.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/Xw/4g/MD5E-s38045436--ba10d5a625c23d52e3a006acff8e5f93/MD5E-s38045436--ba10d5a625c23d52e3a006acff8e5f93

+ 1 - 0
datasets/train_dev_opensubtitles/words/ja.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/Qk/wk/MD5E-s76292553--606db16d9750d6dd927c3c96ccfb3f3e/MD5E-s76292553--606db16d9750d6dd927c3c96ccfb3f3e

+ 1 - 0
datasets/train_dev_opensubtitles/words/pl.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/0Q/4V/MD5E-s81519425--54c4f3d055c65aaf52d17eeedd2f5104/MD5E-s81519425--54c4f3d055c65aaf52d17eeedd2f5104

+ 1 - 0
datasets/train_dev_opensubtitles/words/pt.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/3V/m7/MD5E-s50411350--1d515b5fb8f071aa67d32a6d16d68f1b/MD5E-s50411350--1d515b5fb8f071aa67d32a6d16d68f1b

+ 1 - 0
datasets/train_dev_opensubtitles/words/sr.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/6M/8P/MD5E-s40816898--31575a0edbb081e58f31f0c367bb7f64/MD5E-s40816898--31575a0edbb081e58f31f0c367bb7f64

+ 1 - 0
datasets/train_dev_opensubtitles/words/tr.one_sentence_per_line

@@ -0,0 +1 @@
+../../../.git/annex/objects/53/j5/MD5E-s64456933--7ba9d1f86839b0e5da308d2809d8f4eb/MD5E-s64456933--7ba9d1f86839b0e5da308d2809d8f4eb

+ 157 - 0
extra/languages_to_download_informations.yaml

@@ -0,0 +1,157 @@
+da:
+  espeak_language_id: da
+  language_name: Danish
+  participants:
+  - Target_Child
+  - Mother
+  - Father
+  urls:
+  - https://childes.talkbank.org/data/Scandinavian/Danish/Plunkett.zip
+de:
+  espeak_language_id: de
+  language_name: German
+  participants:
+  - Target_Child
+  - Mother
+  - Father
+  - Grandfather
+  - Grandmother
+  urls:
+  - https://childes.talkbank.org/data/German/Caroline.zip
+  - https://childes.talkbank.org/data/German/Password/Leo.zip
+  - https://childes.talkbank.org/data/German/Password/Rigol.zip
+  - https://childes.talkbank.org/data/German/Wagner.zip
+en:
+  espeak_language_id: en-us
+  language_name: English
+  participants:
+  - Target_Child
+  - Mother
+  - Father
+  - Grandfather
+  - Grandmother
+  urls:
+  - https://phonbank.talkbank.org/data/Eng-NA/Providence.zip
+  - https://childes.talkbank.org/data/Eng-NA/Warren.zip
+  - https://childes.talkbank.org/data/Eng-NA/Brown.zip
+es:
+  espeak_language_id: es
+  language_name: Spanish
+  participants:
+  - Target_Child
+  - Mother
+  - Father
+  - Grandfather
+  - Grandmother
+  urls:
+  - https://childes.talkbank.org/data/Spanish/OreaPine.zip
+  - https://childes.talkbank.org/data/Spanish/Aguirre.zip
+  - https://childes.talkbank.org/data/Spanish/Vila.zip
+  - https://childes.talkbank.org/data/Spanish/Nieva.zip
+  - https://childes.talkbank.org/data/Spanish/Ornat.zip
+  - https://childes.talkbank.org/data/Spanish/Linaza.zip
+  - https://childes.talkbank.org/data/Spanish/BecaCESNo.zip
+et:
+  espeak_language_id: et
+  language_name: Estonian
+  participants:
+  - Target_Child
+  - Mother
+  - Father
+  - Grandfather
+  - Grandmother
+  urls:
+  - https://childes.talkbank.org/data/Other/Estonian/Argus.zip
+  - https://childes.talkbank.org/data/Other/Estonian/Beek.zip
+  - https://childes.talkbank.org/data/Other/Estonian/Kapanen.zip
+  - https://childes.talkbank.org/data/Other/Estonian/Kohler.zip
+  - https://childes.talkbank.org/data/Other/Estonian/Korgesaar.zip
+  - https://childes.talkbank.org/data/Other/Estonian/Kuett.zip
+  - https://childes.talkbank.org/data/Other/Estonian/Vija.zip
+  - https://childes.talkbank.org/data/Other/Estonian/Zupping.zip
+eu:
+  espeak_language_id: eu
+  language_name: Basque
+  participants:
+  - Target_Child
+  - Mother
+  - Father
+  - Grandfather
+  urls:
+  - https://childes.talkbank.org/data/Other/Basque/Soto.zip
+fr:
+  espeak_language_id: fr-fr
+  language_name: French
+  participants:
+  - Target_Child
+  - Mother
+  - Father
+  urls:
+  - https://phonbank.talkbank.org/data/French/Paris.zip
+  - https://phonbank.talkbank.org/data/French/Hunkeler.zip
+  - https://childes.talkbank.org/data/French/Pauline.zip
+  - https://phonbank.talkbank.org/data/French/Yamaguchi.zip
+  - https://childes.talkbank.org/data/French/Leveille.zip
+  - https://phonbank.talkbank.org/data/French/Lyon.zip
+ja:
+  espeak_language_id: ja
+  language_name: Japanese
+  participants:
+  - Target_Child
+  - Mother
+  - Father
+  - Grandfather
+  - Grandmother
+  urls:
+  - https://childes.talkbank.org/data/Japanese/Hamasaki.zip
+  - https://childes.talkbank.org/data/Japanese/Miyata.zip
+  - https://childes.talkbank.org/data/Japanese/Okayama.zip
+  - https://childes.talkbank.org/data/Japanese/Yokoyama.zip
+pl:
+  espeak_language_id: pl
+  language_name: Polish
+  participants:
+  - Target_Child
+  - Mother
+  - Father
+  - Grandfather
+  - Grandmother
+  urls:
+  - https://phonbank.talkbank.org/data/Slavic/Polish/WeistJarosz.zip
+  - https://childes.talkbank.org/data/Slavic/Polish/Szuman.zip
+pt:
+  espeak_language_id: pt
+  language_name: Portuguese
+  participants:
+  - Target_Child
+  - Mother
+  - Father
+  - Grandfather
+  - Grandmother
+  urls:
+  - https://phonbank.talkbank.org/data/Romance/Portuguese/CCF.zip
+  - https://childes.talkbank.org/data/Romance/Portuguese/Florianopolis.zip
+  - https://childes.talkbank.org/data/Romance/Portuguese/Santos.zip
+sr:
+  espeak_language_id: sr
+  language_name: Serbian
+  participants:
+  - Target_Child
+  - Mother
+  - Father
+  - Grandfather
+  - Grandmother
+  urls:
+  - https://childes.talkbank.org/data/Slavic/Serbian/SCECL.zip
+tr:
+  espeak_language_id: tr
+  language_name: Turkish
+  participants:
+  - Target_Child
+  - Mother
+  - Father
+  - Grandfather
+  - Grandmother
+  urls:
+  - https://childes.talkbank.org/data/Other/Turkish/Aksu.zip
+  - https://childes.talkbank.org/data/Other/Turkish/Altinkamis.zip

+ 59 - 0
extra/markers.json

@@ -0,0 +1,59 @@
+{"marker_to_delete": ["\\@b", 
+                    "\\@c", 
+                    "\\@d", 
+                    "\\@e", 
+                    "\\@f", 
+                    "\\@fs", 
+                    "\\@fp", 
+                    "\\@g", 
+                    "\\@i", 
+                    "\\@k", 
+                    "\\@l", 
+                    "\\@n", 
+                    "\\@si", 
+                    "\\@sas", 
+                    "\\@t", 
+                    "\\@u", 
+                    "\\@wp", 
+                    "\\@z", 
+                    "\\&\\+", 
+                    "\\&\\-", 
+                    "\\&", 
+                    "\\(\\.\\)", 
+                    "\\(\\..\\)", 
+                    "\\(\\...\\)", 
+                    "\\(\\....\\)", 
+                    "\\@o", "\\:", 
+                    "\u02c8", 
+                    "\\.", 
+                    "\u02cc", 
+                    " ", 
+                    "\\^", 
+                    "\\|", 
+                    "\\||", 
+                    "\\+"], 
+    "word_contains_delete": 
+                    ["@a", 
+                    "@sl",
+                    "@x", 
+                    "0"], 
+    "poncts_to_delete": ["\\#", 
+                        "\\@", 
+                        "%", 
+                        "\\^", 
+                        "\\&", 
+                        "\\{", 
+                        "\\}", 
+                        "\\$", 
+                        "\\=", 
+                        "\\,", 
+                        "\\.", 
+                        "\\!", 
+                        "\\?",
+                        "˞",
+                        "\\̩",
+                        "\\̃",
+                        "\\ˈ",
+                        "\\ʰ",
+                        "\\›",
+                        "\\‹"]}