Browse Source

[DATALAD] Recorded changes

Lucas Gautheron 8 months ago
parent
commit
06df10583f
5 changed files with 1368 additions and 1 deletions
  1. 329 0
      code/models/dev_siblings.stan
  2. 28 1
      code/models/enumeration.py
  3. 63 0
      code/preprocessing/siblings.py
  4. 841 0
      input/aclew_md.csv
  5. 107 0
      input/siblings.csv

+ 329 - 0
code/models/dev_siblings.stan

@@ -0,0 +1,329 @@
+functions {
+    real confusion_model_lpmf(array[] int group,
+        int start, int end,
+        int n_classes,
+        array[,] int vtc,
+        array[,] int truth,
+        array[] real age,
+        array[] real clip_duration,
+        array[] matrix lambda,
+        array[] vector lambda_fp
+    ) {
+        real ll = 0;
+        vector [4] bp;
+        real lambda_chi;
+
+        vector[16384] log_contrib_comb;
+        int n = size(log_contrib_comb);
+
+        for (k in start:end) {
+            for (i in 1:n_classes) {
+                log_contrib_comb[:n] = rep_vector(0, n);
+                n = 1;
+
+                for (chi in 0:(truth[k,1]>0?max(truth[k,1], vtc[k,i]):0)) {
+                    bp[1] = truth[k,1]==0?0:poisson_lpmf(chi | truth[k,1]*lambda[group[k-start+1],1,i]);
+
+                    for (och in 0:(truth[k,2]>0?max(truth[k,2], vtc[k,i]-chi):0)) {
+                        bp[2] = truth[k,2]==0?0:poisson_lpmf(och | truth[k,2]*lambda[group[k-start+1],2,i]);
+
+                        for (fem in 0:(truth[k,3]>0?max(truth[k,3], vtc[k,i]-chi-och):0)) {
+                            bp[3] = truth[k,3]==0?0:poisson_lpmf(fem | truth[k,3]*lambda[group[k-start+1],3,i]);
+
+                            for (mal in 0:(truth[k,4]>0?max(truth[k,4], vtc[k,i]-chi-och-fem):0)) {
+                                bp[4] = truth[k,4]==0?0:poisson_lpmf(mal | truth[k,4]*lambda[group[k-start+1],4,i]);
+
+                                int delta = vtc[k,i] - (mal+fem+och+chi);
+                                if (delta >= 0) {
+                                    log_contrib_comb[n] += sum(bp);
+                                    log_contrib_comb[n] += poisson_lpmf(
+                                        delta | lambda_fp[group[k-start+1],i]*clip_duration[k]
+                                    );
+                                    n = n+1;
+                                }
+                            }
+                        }
+                    }
+                }
+                if (n>1) {
+                    ll += log_sum_exp(log_contrib_comb[1:n-1]);
+                }
+            }
+        }
+        return ll;
+    }
+
+    real model_lpmf(array[] int children,
+        int start, int end,
+        int n_recs,
+        int n_classes,
+        real duration,
+        array [,] int vocs,
+        array [] real age,
+        matrix truth_vocs,
+        array [] matrix actual_confusion,
+        array [] vector actual_fp_rate
+        ) {
+            real ll = 0;
+
+            vector [4] expect;
+            //vector [4] sd;
+
+            for (k in start:end) {
+                expect = rep_vector(0, 4);
+                //sd = rep_vector(0, 4);
+
+                for (i in 1:n_classes) {
+                    expect[i] = dot_product(truth_vocs[k,:], actual_confusion[k,:,i]);
+                    expect[i] += actual_fp_rate[k,i] * duration;
+                }
+                
+                ll += normal_lpdf(vocs[k,:] | expect, sqrt(expect));
+            }
+
+            return ll;
+        }
+}
+
+// TODO
+// use speech rates to set priors on truth_vocs
+data {
+    int<lower=1> n_classes; // number of classes
+
+    // analysis data block
+    int<lower=1> n_recs;
+    int<lower=1> n_children;
+
+    array[n_recs] int<lower=1> children;
+    array[n_recs] real<lower=1> age;
+    array[n_recs] int<lower=-1> siblings;
+    array[n_recs, n_classes] int<lower=0> vocs;
+    array[n_children] int<lower=1> corpus;
+
+    real<lower=0> recs_duration;
+
+    // speaker confusion data block
+    int<lower=1> n_clips;   // number of clips
+    int<lower=1> n_groups; // number of groups
+    int<lower=1> n_corpora;
+    array [n_clips] int group;
+    array [n_clips] int conf_corpus;
+    array [n_clips,n_classes] int<lower=0> vtc_total; // vtc vocs attributed to specific speakers
+    array [n_clips,n_classes] int<lower=0> truth_total;
+    array [n_clips] real<lower=0> clip_duration;
+    array [n_clips] real<lower=0> clip_age;
+
+    int<lower=0> n_validation;
+
+    // actual speech rates
+    int<lower=1> n_rates;
+    int<lower=1> n_speech_rate_children;
+
+    array [n_rates,n_classes] int<lower=0> speech_rates;
+    array [n_rates] int group_corpus;
+    array [n_rates] real<lower=0> durations;
+    array [n_rates] real<lower=0> speech_rate_age;
+    array [n_rates] int<lower=-1> speech_rate_siblings;
+    array [n_rates] int<lower=1,upper=n_speech_rate_children> speech_rate_child;
+
+    // parallel processing
+    int<lower=1> threads;
+}
+
+transformed data {
+    vector<lower=0>[n_groups] recording_age;
+    array[n_speech_rate_children] int<lower=1> speech_rate_child_corpus;
+
+    array[n_children] int<lower=-1> child_siblings;
+    array[n_speech_rate_children] int<lower=-1> speech_rate_child_siblings;
+
+    for (c in 1:n_clips) {
+        recording_age[group[c]] = clip_age[c];
+    }
+
+    for (k in 1:n_rates) {
+        speech_rate_child_corpus[speech_rate_child[k]] = group_corpus[k];
+    }
+
+    for (k in 1:n_recs) {
+        child_siblings[children[k]] = siblings[k];
+    }
+
+    for (k in 1:n_rates) {
+        speech_rate_child_siblings[speech_rate_child[k]] = speech_rate_siblings[k];
+    }
+}
+
+parameters {
+    matrix<lower=0>[n_children,n_classes-1] mu_child_level;
+    vector [n_children] child_dev_age;
+    matrix<lower=0> [n_recs, n_classes] truth_vocs;
+
+    // nuisance parameters
+    array [n_recs] matrix<lower=0>[n_classes,n_classes] actual_confusion_baseline;
+    array [n_recs] vector<lower=0>[n_classes] actual_fp_rate;
+
+    // confusion parameters
+    // confusion matrix
+    matrix<lower=0>[n_classes,n_classes] alphas;
+    matrix<lower=0>[n_classes,n_classes] mus;
+    array [n_groups] matrix<lower=0>[n_classes,n_classes] lambda;
+    // false positives
+    vector<lower=0>[n_classes] alphas_fp;
+    vector<lower=0>[n_classes] mus_fp;
+    array [n_groups] vector<lower=0>[n_classes] lambda_fp;
+
+    // speech rates
+    vector<lower=0>[n_classes] alpha_child_level; // variance across recordings for a given child
+    matrix<lower=0>[n_classes-1,n_corpora] alpha_corpus_level; // variance among children
+    matrix<lower=0>[n_classes-1,n_corpora] mu_corpus_level; // child-level average
+    vector<lower=0>[n_classes-1] alpha_pop_level; // variance among corpora
+    vector<lower=0>[n_classes] mu_pop_level; // population level averages
+    vector<lower=0>[n_classes-1] alpha_pop;
+    matrix<lower=0>[n_classes,n_rates] speech_rate; // truth speech rates observed in annotated clips
+    matrix<lower=0>[n_speech_rate_children,n_classes-1] speech_rate_child_level; // expected speech rate at the child-level
+    real<lower=0> beta_sib_och; // effect of n of siblings on OCH speech
+
+    vector [n_speech_rate_children] child_dev_speech_age;
+
+    // average effect of age
+    real alpha_dev;
+    real<lower=0> sigma_dev;
+
+    // effect of excess ADU input
+    real beta_dev;
+}
+
+model {
+    //actual model
+
+    target += reduce_sum(
+       model_lpmf, children, 1,
+       n_recs, n_classes, recs_duration,
+       vocs, age,
+       truth_vocs, actual_confusion_baseline, actual_fp_rate
+    );
+
+    for (k in 1:n_recs) {
+        for (i in 1:n_classes) {
+            if (i == 1) {
+                actual_confusion_baseline[k,i] ~ gamma(alphas[i,:], alphas[i,:]./mus[i,:]);
+                //actual_confusion_baseline[k,i] ~ gamma(alphas[i,:], alphas[i,:]./(mus[i,:].*exp(delta_chi_age'*age[k]/12.0))); //'
+            }
+            else {
+                actual_confusion_baseline[k,i] ~ gamma(alphas[i,:], alphas[i,:]./mus[i,:]);
+            }
+        }
+        actual_fp_rate[k] ~ gamma(alphas_fp, alphas_fp./mus_fp);
+    }
+    
+
+    for (k in 1:n_recs) {
+        real chi_mu = exp(
+            log(mu_pop_level[1]) + child_dev_age[children[k]]*age[k]/12.0/10.0+beta_dev*(mu_child_level[children[k],2]+mu_child_level[children[k],3]-mu_pop_level[3]-mu_pop_level[4])*age[k]/12.0/10.0
+        );
+        (truth_vocs[k,1]/1000/recs_duration) ~ gamma(
+            alpha_child_level[1],
+            alpha_child_level[1]/chi_mu
+        );
+
+        real och_mu = exp(
+            log(mu_child_level[children[k],1]) + (child_siblings[children[k]]>0?beta_sib_och:0)
+        );
+        (truth_vocs[k,2]/1000/recs_duration) ~ gamma(
+            alpha_child_level[2],
+            alpha_child_level[2]/och_mu
+        );
+        
+        (truth_vocs[k,3:]/1000/recs_duration) ~ gamma(
+            alpha_child_level[3:], alpha_child_level[2:]./mu_child_level[children[k],2:]' //'
+        );    
+    }
+
+    for (c in 1:n_children) {
+        mu_child_level[c] ~ gamma(
+            alpha_corpus_level[:,corpus[c]],
+            (alpha_corpus_level[:,corpus[c]]./mu_corpus_level[:,corpus[c]])
+        );
+    }
+
+    alpha_child_level ~ gamma(2,1);
+
+    target += reduce_sum(
+        confusion_model_lpmf, group, n_clips%/%(threads*4),
+        n_classes,
+        vtc_total, truth_total, clip_duration, clip_age,
+        lambda, lambda_fp
+    );
+
+    mus_fp ~ exponential(1);
+    alphas_fp ~ gamma(2, 1);
+
+    for (i in 1:n_classes) {
+        lambda_fp[:,i] ~ gamma(alphas_fp[i], alphas_fp[i]/mus_fp[i]);
+
+        for (j in 1:n_classes) {
+            mus[i,j] ~ exponential(i==j?2:8);
+            alphas[i,j] ~ gamma(2,1);
+            for (c in 1:n_groups) {
+                if (i==1) {
+                    lambda[c,i,j] ~ gamma(alphas[i,j], alphas[i,j]/mus[i,j]);
+                    //lambda[c,i,j] ~ gamma(alphas[i,j], alphas[i,j]/(mus[i,j]*exp(delta_chi_age[j]*recording_age[c]/12.0)));
+                }
+                else {
+                    lambda[c,i,j] ~ gamma(alphas[i,j], alphas[i,j]/mus[i,j]);
+                }
+            }
+        }
+    }
+    //delta_chi_age ~ normal(0, 0.1);
+
+    // speech rates
+    mu_pop_level ~ exponential(4);
+    alpha_pop_level ~ gamma(8, 4);
+    alpha_pop ~ gamma(10, 10);
+    for (i in 1:n_classes-1) {
+        alpha_corpus_level[i,:] ~ gamma(4, 4/alpha_pop[i]);
+        mu_corpus_level[i,:] ~ gamma(alpha_pop_level[i],alpha_pop_level[i]/mu_pop_level[i+1]);
+    }
+
+    for (g in 1:n_rates) {
+        real chi_mu = exp(
+            log(mu_pop_level[1]) + child_dev_speech_age[speech_rate_child[g]]*speech_rate_age[g]/12.0/10.0 + beta_dev*(speech_rate_child_level[speech_rate_child[g],2]+speech_rate_child_level[speech_rate_child[g],3]-mu_pop_level[3]-mu_pop_level[4])*speech_rate_age[g]/12.0/10.0
+        );
+        speech_rate[1,g] ~ gamma(
+            alpha_child_level[1],
+            alpha_child_level[1]/chi_mu
+        );
+
+        real och_mu = exp(
+            log(speech_rate_child_level[speech_rate_child[g],1]) + (speech_rate_child_siblings[speech_rate_child[g]]>0?beta_sib_och:0)
+        );
+        speech_rate[2,g] ~ gamma(
+            alpha_child_level[2],
+            alpha_child_level[2]/och_mu
+        );
+
+        speech_rate[3:,g] ~ gamma(
+            alpha_child_level[3:],
+            (alpha_child_level[3:]./(speech_rate_child_level[speech_rate_child[g],2:]')) //'
+        );
+        speech_rates[g,:] ~ poisson(speech_rate[:,g]*durations[g]*1000);
+    }
+
+    for (c in 1:n_speech_rate_children) {
+        speech_rate_child_level[c,:] ~ gamma(
+            alpha_corpus_level[:,speech_rate_child_corpus[c]],
+            (alpha_corpus_level[:,speech_rate_child_corpus[c]]./(mu_corpus_level[:,speech_rate_child_corpus[c]]))
+        );
+    }
+
+    child_dev_age ~ normal(alpha_dev, sigma_dev);
+    child_dev_speech_age ~ normal(alpha_dev, sigma_dev);
+    beta_sib_och ~ exponential(1);
+
+    alpha_dev ~ normal(0, 1);
+    sigma_dev ~ exponential(1);
+
+    beta_dev ~ normal(0, 1);
+}

+ 28 - 1
code/models/enumeration.py

@@ -27,6 +27,8 @@ matplotlib.rcParams.update(
     }
 )
 
+from collections import defaultdict
+
 import pickle
 
 import datalad.api
@@ -69,6 +71,18 @@ def extrude(self, removed, mode: str = "intersection"):
 
     return self.crop(truncating_support, mode=mode)
 
+def children_siblings(corpus):
+    siblings = pd.read_csv("input/siblings.csv")
+    siblings = siblings[siblings["corpus"]==corpus].set_index("child_id")
+    siblings = siblings["n_siblings"].to_dict()
+
+    n = defaultdict(-1)
+    for c in siblings:
+        n[c] = siblings[c]
+
+    return n
+
+
 
 def compute_counts(parameters):
     corpus = parameters["corpus"]
@@ -201,7 +215,12 @@ def rates(parameters):
     metrics = pipeline.extract()
     metrics = pd.DataFrame(metrics).assign(corpus=corpus, annotator=annotator)
     project.recordings["age"] = project.compute_ages()
-    metrics = metrics.merge(project.recordings[["recording_filename", "age"]])
+    project.recordings["siblings"] = project.recordings.child_id.map(
+        children_siblings(corpus)
+    )
+    metrics = metrics.merge(
+        project.recordings[["recording_filename", "age", "siblings"]]
+    )
     metrics["duration"] = metrics[f"duration_{annotator}"] / 1000 / 3600
     metrics = metrics[metrics["duration"] > 0.01]
     metrics["child"] = corpus + "_" + metrics["child_id"].astype(str)
@@ -261,6 +280,9 @@ def compile_recordings(corpus):
     am.read()
 
     project.recordings["age"] = project.compute_ages()
+    project.recordings["siblings"] = project.recordings.child_id.map(
+        children_siblings(corpus)
+    )
 
     annotations = am.annotations[am.annotations["set"] == "vtc"]
     annotations = annotations.merge(
@@ -292,6 +314,7 @@ def compile_recordings(corpus):
         child_id = _annotations["child_id"].max()
         age = _annotations["age"].max()
         duration = (_annotations["range_offset"] - _annotations["range_onset"]).sum()
+        siblings = _annotations["siblings"].max()
 
         if duration < args.duration * 3600 * 1000:
             continue
@@ -331,6 +354,7 @@ def compile_recordings(corpus):
         rec["children"] = f"{corpus}_{child_id}"
         rec["corpus"] = basename(corpus)
         rec["age"] = age
+        rec["siblings"] = siblings
 
         recs.append(rec)
 
@@ -372,6 +396,7 @@ if __name__ == "__main__":
         [speech_rates[f"speech_rate_{i}"].values for i in range(4)]
     )
     speech_rate_age = speech_rates["age"].values
+    speech_rate_siblings = speech_rates["siblings"].values.astype(int)
 
     speech_rates.to_csv("rates.csv")
 
@@ -393,6 +418,7 @@ if __name__ == "__main__":
         "n_unique_clips": data["clip_id"].nunique(),
         "speech_rates": speech_rate_matrix.astype(int),
         "speech_rate_age": speech_rate_age,
+        "speech_rate_siblings": speech_rate_siblings,
         "group_corpus": (
             1 + speech_rates["corpus"].map(corpora_map).astype(int).values
         ),
@@ -415,6 +441,7 @@ if __name__ == "__main__":
         "children": recs["children"],
         "vocs": np.transpose([recs[f"vtc_{i}"].values for i in range(4)]),
         "age": recs["age"],
+        "siblings": recs["siblings"].astype(int),
         "corpus": children_corpus,
         "recs_duration": args.duration,
     }

+ 63 - 0
code/preprocessing/siblings.py

@@ -0,0 +1,63 @@
+import pandas as pd 
+
+from ChildProject.projects import ChildProject
+
+from os.path import join as opj, basename
+
+corpora = [
+    "input/bergelson",
+    "input/warlaumont",
+    "input/winnipeg",
+    "input/lucid"
+]
+
+dic = {
+    "input/bergelson": "confidential/original/bergelson_dict.csv",
+    "input/lucid": "confidential/original/lucid_dict.csv",
+    "input/warlaumont": "original/warlaumont_dict_matched.csv",
+    "input/winnipeg": "confidential/original/winnipeg_dict_matched.csv"
+}
+
+correspondance = {
+    "BER": "input/bergelson",
+    "ROW": "input/lucid",
+    "SOD": "input/winnipeg",
+    "WAR": "input/warlaumont"
+}
+
+projects = [ 
+    ChildProject(corpus) for corpus in corpora
+]
+
+for project in projects:
+    project.read()
+
+recordings = pd.concat([
+    projects[i].recordings.assign(corpus=corpus)
+    for i, corpus in enumerate(corpora)
+])
+
+recordings["its_filename"] = recordings["its_filename"].str.replace(".its", "")
+
+aclew_id = pd.concat([
+    pd.read_csv(opj(corpus, "metadata", dic[corpus])).assign(corpus=corpus)
+    for corpus in corpora
+])
+
+aclew_id["its"] = aclew_id["its"].str.replace(".its", "")
+
+
+aclew_md = pd.read_csv("input/aclew_md.csv")
+
+recordings = recordings[["corpus", "child_id", "recording_filename", "its_filename"]].merge(
+    aclew_id[["corpus", "its", "aclew_id"]],
+    how="inner",
+    left_on=["corpus", "its_filename"],
+    right_on=["corpus", "its"]
+)
+
+recordings = recordings.merge(aclew_md, how="inner", left_on="aclew_id", right_on="aclew_id")
+children = recordings.groupby(["corpus", "child_id"]).agg(n_siblings=("number_older_sibs", "max"))
+children = children.reset_index()
+children["corpus"] = children.corpus.map(basename)
+children.to_csv("input/siblings.csv")

+ 841 - 0
input/aclew_md.csv

@@ -0,0 +1,841 @@
+"labname","aclew_id","child_level_id","number_older_sibs"
+"BER",6265,"06",0
+"BER",1227,"01",1
+"BER",5510,"01",1
+"BER",1982,"01",1
+"BER",3439,"01",1
+"BER",8712,"01",1
+"BER",5813,"01",1
+"BER",7979,"01",1
+"BER",4890,"01",1
+"BER",1445,"01",1
+"BER",2224,"01",1
+"BER",9866,"01",1
+"BER",9061,"02",1
+"BER",3483,"02",1
+"BER",673,"02",1
+"BER",5388,"02",1
+"BER",4330,"02",1
+"BER",3832,"02",1
+"BER",7209,"02",1
+"BER",8307,"02",1
+"BER",1836,"02",1
+"BER",8757,"02",1
+"BER",2756,"02",1
+"BER",9306,"02",1
+"BER",4524,"07",0
+"BER",7780,"03",1
+"BER",9470,"03",1
+"BER",850,"03",1
+"BER",4164,"03",1
+"BER",6245,"03",1
+"BER",1843,"03",1
+"BER",8204,"03",1
+"BER",7810,"03",1
+"BER",256,"03",1
+"BER",3194,"08",0
+"BER",8262,"04",4
+"BER",3895,"04",4
+"BER",5102,"04",4
+"BER",9363,"04",4
+"BER",9947,"04",4
+"BER",3657,"04",4
+"BER",3798,"04",4
+"BER",2101,"04",4
+"BER",1234,"04",4
+"BER",595,"04",4
+"BER",6112,"04",4
+"BER",6661,"09",0
+"BER",4889,"06",0
+"BER",5034,"06",0
+"BER",8638,"06",0
+"BER",1104,"06",0
+"BER",2471,"06",0
+"BER",9513,"06",0
+"BER",3256,"06",0
+"BER",9123,"06",0
+"BER",6016,"06",0
+"BER",8735,"06",0
+"BER",9685,"06",0
+"BER",2755,"11",0
+"BER",713,"07",0
+"BER",8191,"07",0
+"BER",964,"07",0
+"BER",4912,"07",0
+"BER",5192,"07",0
+"BER",291,"07",0
+"BER",4806,"07",0
+"BER",486,"07",0
+"BER",3455,"07",0
+"BER",7996,"07",0
+"BER",6937,"07",0
+"BER",7563,"12",0
+"BER",3916,"08",0
+"BER",3777,"08",0
+"BER",4780,"08",0
+"BER",8109,"08",0
+"BER",6563,"08",0
+"BER",6035,"08",0
+"BER",1137,"08",0
+"BER",712,"08",0
+"BER",8797,"08",0
+"BER",6049,"08",0
+"BER",7870,"08",0
+"BER",1596,"16",0
+"BER",4180,"09",0
+"BER",5976,"09",0
+"BER",9039,"09",0
+"BER",5157,"09",0
+"BER",6018,"09",0
+"BER",2470,"09",0
+"BER",1401,"09",0
+"BER",3063,"09",0
+"BER",9453,"09",0
+"BER",1075,"09",0
+"BER",7758,"09",0
+"BER",9244,"19",0
+"BER",1575,"10",1
+"BER",3702,"10",1
+"BER",5228,"10",1
+"BER",414,"10",1
+"BER",9036,"10",1
+"BER",5242,"10",1
+"BER",6200,"10",1
+"BER",1310,"10",1
+"BER",6441,"10",1
+"BER",5622,"10",1
+"BER",4473,"10",1
+"BER",1416,"20",0
+"BER",6957,"11",0
+"BER",466,"11",0
+"BER",2858,"11",0
+"BER",2016,"11",0
+"BER",8595,"11",0
+"BER",6954,"11",0
+"BER",1414,"11",0
+"BER",3749,"11",0
+"BER",5777,"11",0
+"BER",1405,"11",0
+"BER",7392,"11",0
+"BER",980,"21",0
+"BER",5980,"12",0
+"BER",1665,"12",0
+"BER",8922,"12",0
+"BER",9487,"12",0
+"BER",5606,"12",0
+"BER",485,"12",0
+"BER",4622,"12",0
+"BER",9392,"12",0
+"BER",7324,"12",0
+"BER",5706,"12",0
+"BER",284,"12",0
+"BER",5092,"22",0
+"BER",3949,"13",2
+"BER",9310,"13",2
+"BER",2075,"13",2
+"BER",1208,"13",2
+"BER",9863,"13",2
+"BER",174,"13",2
+"BER",2876,"13",2
+"BER",1395,"13",2
+"BER",1904,"13",2
+"BER",4706,"13",2
+"BER",4703,"13",2
+"BER",6249,"25",0
+"BER",6023,"14",1
+"BER",5430,"14",1
+"BER",5483,"14",1
+"BER",6174,"14",1
+"BER",4731,"14",1
+"BER",226,"14",1
+"BER",2927,"14",1
+"BER",2580,"14",1
+"BER",3024,"14",1
+"BER",3395,"14",1
+"BER",1698,"14",1
+"BER",1620,"27",0
+"BER",7269,"15",2
+"BER",6433,"15",2
+"BER",2615,"15",2
+"BER",7377,"15",2
+"BER",2345,"15",2
+"BER",7175,"15",2
+"BER",4280,"15",2
+"BER",2418,"15",2
+"BER",7239,"15",2
+"BER",5778,"15",2
+"BER",8147,"15",2
+"BER",6402,"28",0
+"BER",8246,"16",0
+"BER",774,"16",0
+"BER",7829,"16",0
+"BER",1777,"16",0
+"BER",1931,"16",0
+"BER",395,"16",0
+"BER",2191,"16",0
+"BER",4032,"16",0
+"BER",7115,"16",0
+"BER",2779,"16",0
+"BER",9692,"16",0
+"BER",8742,"29",0
+"BER",6267,"17",1
+"BER",3641,"17",1
+"BER",8319,"17",1
+"BER",4172,"17",1
+"BER",1889,"17",1
+"BER",1500,"17",1
+"BER",1458,"17",1
+"BER",3654,"17",1
+"BER",2777,"17",1
+"BER",8688,"17",1
+"BER",5109,"17",1
+"BER",5454,"30",0
+"BER",8323,"18",1
+"BER",990,"18",1
+"BER",5938,"18",1
+"BER",9230,"18",1
+"BER",6471,"18",1
+"BER",9997,"18",1
+"BER",2852,"18",1
+"BER",3538,"18",1
+"BER",2348,"18",1
+"BER",52,"18",1
+"BER",7011,"18",1
+"BER",532,"33",0
+"BER",131,"19",0
+"BER",5988,"19",0
+"BER",6499,"19",0
+"BER",7679,"19",0
+"BER",8786,"19",0
+"BER",2564,"19",0
+"BER",8020,"19",0
+"BER",3739,"19",0
+"BER",993,"19",0
+"BER",4536,"34",0
+"BER",6209,"20",0
+"BER",9756,"20",0
+"BER",459,"20",0
+"BER",9009,"20",0
+"BER",8491,"20",0
+"BER",9012,"20",0
+"BER",7604,"20",0
+"BER",6745,"20",0
+"BER",3068,"20",0
+"BER",3340,"20",0
+"BER",9779,"20",0
+"BER",3149,"35",0
+"BER",8629,"21",0
+"BER",7448,"21",0
+"BER",2405,"21",0
+"BER",4096,"21",0
+"BER",7099,"21",0
+"BER",9348,"21",0
+"BER",2447,"21",0
+"BER",4439,"21",0
+"BER",6201,"21",0
+"BER",5240,"21",0
+"BER",527,"21",0
+"BER",2795,"43",0
+"BER",6968,"22",0
+"BER",9454,"22",0
+"BER",7385,"22",0
+"BER",6935,"22",0
+"BER",7162,"22",0
+"BER",109,"22",0
+"BER",2318,"22",0
+"BER",8393,"22",0
+"BER",2197,"22",0
+"BER",7549,"22",0
+"BER",7041,"46",0
+"BER",2882,"23",4
+"BER",9077,"23",4
+"BER",5897,"23",4
+"BER",41,"23",4
+"BER",5074,"23",4
+"BER",7362,"23",4
+"BER",7523,"23",4
+"BER",9996,"23",4
+"BER",8770,"23",4
+"BER",3526,"23",4
+"BER",6604,"23",4
+"BER",9803,"01",1
+"BER",2474,"25",0
+"BER",1537,"25",0
+"BER",639,"25",0
+"BER",7505,"25",0
+"BER",3511,"25",0
+"BER",2970,"25",0
+"BER",8189,"25",0
+"BER",7465,"25",0
+"BER",7965,"25",0
+"BER",4626,"25",0
+"BER",6971,"25",0
+"BER",6047,"03",1
+"BER",1618,"26",1
+"BER",113,"26",1
+"BER",8948,"26",1
+"BER",9294,"26",1
+"BER",6586,"26",1
+"BER",4873,"26",1
+"BER",8049,"26",1
+"BER",4880,"26",1
+"BER",9409,"26",1
+"BER",293,"26",1
+"BER",1800,"26",1
+"BER",4572,"10",1
+"BER",8993,"27",0
+"BER",4301,"27",0
+"BER",240,"27",0
+"BER",6831,"27",0
+"BER",6938,"27",0
+"BER",9717,"27",0
+"BER",4291,"27",0
+"BER",4989,"27",0
+"BER",8655,"27",0
+"BER",1345,"27",0
+"BER",956,"27",0
+"BER",7462,"14",1
+"BER",820,"28",0
+"BER",5449,"28",0
+"BER",4644,"28",0
+"BER",1844,"28",0
+"BER",9762,"28",0
+"BER",2437,"28",0
+"BER",2253,"28",0
+"BER",7170,"28",0
+"BER",5694,"28",0
+"BER",4704,"28",0
+"BER",4675,"28",0
+"BER",7137,"17",1
+"BER",9770,"29",0
+"BER",6788,"29",0
+"BER",8885,"29",0
+"BER",6733,"29",0
+"BER",4482,"29",0
+"BER",4266,"29",0
+"BER",2880,"29",0
+"BER",8110,"29",0
+"BER",3048,"29",0
+"BER",1145,"29",0
+"BER",397,"29",0
+"BER",1805,"18",1
+"BER",145,"30",0
+"BER",2902,"30",0
+"BER",3548,"30",0
+"BER",833,"30",0
+"BER",4550,"30",0
+"BER",3758,"30",0
+"BER",9317,"30",0
+"BER",6205,"30",0
+"BER",4144,"30",0
+"BER",2330,"30",0
+"BER",176,"30",0
+"BER",5849,"26",1
+"BER",4988,"31",1
+"BER",9337,"31",1
+"BER",16,"31",1
+"BER",4966,"31",1
+"BER",9291,"31",1
+"BER",4726,"31",1
+"BER",4041,"31",1
+"BER",3991,"31",1
+"BER",1403,"31",1
+"BER",9241,"31",1
+"BER",5315,"31",1
+"BER",3135,"31",1
+"BER",7186,"32",2
+"BER",7270,"32",2
+"BER",1238,"32",2
+"BER",1668,"32",2
+"BER",9643,"32",2
+"BER",3662,"32",2
+"BER",404,"32",2
+"BER",7339,"32",2
+"BER",2941,"32",2
+"BER",6835,"32",2
+"BER",3994,"32",2
+"BER",5407,"37",1
+"BER",7643,"33",0
+"BER",5065,"33",0
+"BER",6143,"33",0
+"BER",1328,"33",0
+"BER",2379,"33",0
+"BER",56,"33",0
+"BER",4590,"33",0
+"BER",1760,"33",0
+"BER",955,"33",0
+"BER",4636,"33",0
+"BER",6315,"33",0
+"BER",4071,"38",1
+"BER",9411,"34",0
+"BER",7221,"34",0
+"BER",2744,"34",0
+"BER",4073,"34",0
+"BER",4520,"34",0
+"BER",6002,"34",0
+"BER",5906,"34",0
+"BER",4964,"34",0
+"BER",3778,"34",0
+"BER",65,"34",0
+"BER",9645,"34",0
+"BER",127,"41",1
+"BER",7066,"35",0
+"BER",2973,"35",0
+"BER",9973,"35",0
+"BER",4141,"35",0
+"BER",1862,"35",0
+"BER",6735,"35",0
+"BER",4679,"35",0
+"BER",818,"35",0
+"BER",7117,"35",0
+"BER",563,"35",0
+"BER",5187,"35",0
+"BER",2231,"42",1
+"BER",5750,"36",3
+"BER",9333,"36",3
+"BER",2948,"36",3
+"BER",7701,"36",3
+"BER",2048,"36",3
+"BER",1103,"36",3
+"BER",8594,"36",3
+"BER",2984,"36",3
+"BER",8263,"36",3
+"BER",2074,"36",3
+"BER",4534,"36",3
+"BER",8102,"13",2
+"BER",4127,"37",1
+"BER",9531,"37",1
+"BER",701,"37",1
+"BER",6258,"37",1
+"BER",4128,"37",1
+"BER",559,"37",1
+"BER",8790,"37",1
+"BER",6924,"37",1
+"BER",9993,"37",1
+"BER",4897,"37",1
+"BER",2410,"37",1
+"BER",5480,"15",2
+"BER",9998,"38",1
+"BER",5193,"38",1
+"BER",9457,"38",1
+"BER",6345,"38",1
+"BER",2519,"38",1
+"BER",7072,"38",1
+"BER",7953,"38",1
+"BER",6483,"38",1
+"BER",8510,"38",1
+"BER",4168,"38",1
+"BER",8991,"38",1
+"BER",4698,"32",2
+"BER",6495,"39",2
+"BER",571,"39",2
+"BER",8235,"39",2
+"BER",1849,"39",2
+"BER",4091,"39",2
+"BER",4182,"39",2
+"BER",2828,"39",2
+"BER",5991,"39",2
+"BER",2088,"39",2
+"BER",9701,"39",2
+"BER",1899,"39",2
+"BER",1550,"39",2
+"BER",677,"40",2
+"BER",9228,"40",2
+"BER",6997,"40",2
+"BER",8193,"40",2
+"BER",2517,"40",2
+"BER",1801,"40",2
+"BER",3447,"40",2
+"BER",777,"40",2
+"BER",4078,"40",2
+"BER",576,"40",2
+"BER",4839,"40",2
+"BER",5604,"40",2
+"BER",4886,"41",1
+"BER",7823,"41",1
+"BER",625,"41",1
+"BER",7619,"41",1
+"BER",4152,"41",1
+"BER",3405,"41",1
+"BER",1189,"41",1
+"BER",8069,"41",1
+"BER",5633,"41",1
+"BER",2024,"41",1
+"BER",5605,"41",1
+"BER",547,"36",3
+"BER",4503,"42",1
+"BER",8114,"42",1
+"BER",4823,"42",1
+"BER",2090,"42",1
+"BER",4528,"42",1
+"BER",5016,"42",1
+"BER",7025,"42",1
+"BER",6570,"42",1
+"BER",9892,"42",1
+"BER",6801,"42",1
+"BER",2489,"42",1
+"BER",2798,"44",3
+"BER",3663,"43",0
+"BER",6939,"43",0
+"BER",2476,"43",0
+"BER",4718,"43",0
+"BER",4051,"43",0
+"BER",7942,"43",0
+"BER",357,"43",0
+"BER",6479,"43",0
+"BER",396,"43",0
+"BER",8,"43",0
+"BER",3489,"43",0
+"BER",9369,"04",4
+"BER",3108,"44",3
+"BER",4755,"44",3
+"BER",1196,"44",3
+"BER",2261,"44",3
+"BER",5678,"44",3
+"BER",2200,"44",3
+"BER",7609,"44",3
+"BER",2417,"44",3
+"BER",2307,"44",3
+"BER",5267,"44",3
+"BER",8104,"44",3
+"BER",1713,"45",1
+"BER",6336,"45",1
+"BER",4604,"45",1
+"BER",21,"45",1
+"BER",6222,"45",1
+"BER",4596,"45",1
+"BER",8101,"45",1
+"BER",9541,"45",1
+"BER",5402,"45",1
+"BER",1306,"45",1
+"BER",6411,"45",1
+"BER",2242,"23",4
+"BER",5162,"46",0
+"BER",2446,"46",0
+"BER",1427,"46",0
+"BER",6457,"46",0
+"BER",6507,"46",0
+"BER",4714,"46",0
+"BER",2855,"46",0
+"BER",8222,"46",0
+"BER",2957,"46",0
+"BER",1330,"46",0
+"BER",4841,"46",0
+"ROW",8458,"C004",0
+"ROW",3664,"C004",0
+"ROW",7613,"C004",0
+"ROW",1875,"C004",0
+"ROW",7320,"C004",0
+"ROW",2099,"C004",0
+"ROW",7102,"C004",0
+"ROW",1916,"C006",1
+"ROW",5670,"C006",1
+"ROW",4009,"C006",1
+"ROW",3430,"C006",1
+"ROW",2006,"C006",1
+"ROW",6822,"C006",1
+"ROW",572,"C006",1
+"ROW",8080,"C008",2
+"ROW",6164,"C008",2
+"ROW",4955,"C008",2
+"ROW",8636,"C008",2
+"ROW",4877,"C008",2
+"ROW",2542,"C008",2
+"ROW",3825,"C008",2
+"ROW",3417,"C010",1
+"ROW",5644,"C010",1
+"ROW",1672,"C010",1
+"ROW",6276,"C010",1
+"ROW",6969,"C010",1
+"ROW",4224,"C010",1
+"ROW",5715,"C010",1
+"ROW",4913,"C012",0
+"ROW",5355,"C012",0
+"ROW",7090,"C012",0
+"ROW",1768,"C012",0
+"ROW",3350,"C012",0
+"ROW",1065,"C012",0
+"ROW",454,"C012",0
+"ROW",8807,"C016",1
+"ROW",3219,"C016",1
+"ROW",6970,"C016",1
+"ROW",1815,"C016",1
+"ROW",8669,"C016",1
+"ROW",7144,"C016",1
+"ROW",3486,"C016",1
+"ROW",1129,"C018",1
+"ROW",8738,"C018",1
+"ROW",9801,"C018",1
+"ROW",4431,"C018",1
+"ROW",3761,"C018",1
+"ROW",5289,"C018",1
+"ROW",2196,"C018",1
+"ROW",1081,"C019",0
+"ROW",2945,"C019",0
+"ROW",7980,"C019",0
+"ROW",4865,"C019",0
+"ROW",9408,"C019",0
+"ROW",8905,"C019",0
+"ROW",4425,"C023",2
+"ROW",7345,"C023",2
+"ROW",4132,"C023",2
+"ROW",7132,"C023",2
+"ROW",2925,"C023",2
+"ROW",948,"C023",2
+"ROW",7288,"C024",0
+"ROW",2516,"C024",0
+"ROW",2745,"C024",0
+"ROW",8034,"C024",0
+"ROW",2014,"C024",0
+"ROW",9497,"C024",0
+"ROW",7999,"C024",0
+"ROW",9245,"C025",0
+"ROW",3438,"C025",0
+"ROW",1132,"C025",0
+"ROW",9620,"C025",0
+"ROW",490,"C025",0
+"ROW",6339,"C025",0
+"ROW",2296,"C025",0
+"ROW",9269,"C026",2
+"ROW",7866,"C026",2
+"ROW",3628,"C026",2
+"ROW",3584,"C026",2
+"ROW",1156,"C026",2
+"ROW",7897,"C026",2
+"ROW",558,"C026",2
+"ROW",2759,"C029",0
+"ROW",9826,"C029",0
+"ROW",4148,"C029",0
+"ROW",7088,"C029",0
+"ROW",96,"C029",0
+"ROW",7148,"C029",0
+"ROW",9057,"C030",1
+"ROW",4816,"C030",1
+"ROW",2391,"C030",1
+"ROW",7437,"C030",1
+"ROW",1522,"C030",1
+"ROW",5536,"C030",1
+"ROW",8008,"C030",1
+"ROW",7413,"C035",0
+"ROW",6648,"C035",0
+"ROW",5600,"C035",0
+"ROW",1905,"C035",0
+"ROW",6834,"C035",0
+"ROW",3945,"C035",0
+"ROW",3988,"C035",0
+"ROW",5272,"C036",2
+"ROW",6431,"C036",2
+"ROW",4214,"C036",2
+"ROW",2298,"C036",2
+"ROW",9170,"C036",2
+"ROW",2041,"C036",2
+"ROW",3391,"C036",2
+"ROW",1680,"C037",0
+"ROW",5318,"C037",0
+"ROW",7765,"C037",0
+"ROW",3673,"C037",0
+"ROW",5182,"C037",0
+"ROW",66,"C037",0
+"ROW",6368,"C037",0
+"ROW",8431,"C038",0
+"ROW",3905,"C038",0
+"ROW",9498,"C038",0
+"ROW",2176,"C038",0
+"ROW",7585,"C038",0
+"ROW",1937,"C038",0
+"ROW",4630,"C038",0
+"ROW",270,"C040",0
+"ROW",4392,"C040",0
+"ROW",8105,"C040",0
+"ROW",5655,"C040",0
+"ROW",5903,"C040",0
+"ROW",2956,"C040",0
+"ROW",2271,"C040",0
+"ROW",3366,"C041",2
+"ROW",9775,"C041",2
+"ROW",8108,"C041",2
+"ROW",40,"C041",2
+"ROW",4011,"C041",2
+"ROW",8031,"C041",2
+"ROW",5818,"C041",2
+"ROW",2693,"C042",0
+"ROW",1102,"C042",0
+"ROW",668,"C042",0
+"ROW",2774,"C042",0
+"ROW",8299,"C042",0
+"ROW",3978,"C042",0
+"ROW",8421,"C042",0
+"ROW",8357,"C046",1
+"ROW",6027,"C046",1
+"ROW",8296,"C046",1
+"ROW",6133,"C046",1
+"ROW",908,"C046",1
+"ROW",8083,"C046",1
+"ROW",8041,"C046",1
+"ROW",112,"C047",0
+"ROW",2834,"C047",0
+"ROW",2731,"C047",0
+"ROW",2144,"C047",0
+"ROW",8044,"C047",0
+"ROW",5931,"C047",0
+"ROW",7468,"C047",0
+"ROW",1324,"C049",1
+"ROW",2671,"C049",1
+"ROW",9074,"C049",1
+"ROW",3583,"C049",1
+"ROW",2400,"C049",1
+"ROW",4602,"C049",1
+"ROW",5368,"C050",0
+"ROW",2028,"C050",0
+"ROW",7995,"C050",0
+"ROW",5420,"C050",0
+"ROW",4761,"C050",0
+"ROW",5830,"C050",0
+"ROW",9271,"C050",0
+"ROW",3282,"C054",1
+"ROW",405,"C054",1
+"ROW",5250,"C054",1
+"ROW",1735,"C054",1
+"ROW",1199,"C054",1
+"ROW",138,"C054",1
+"ROW",2574,"C054",1
+"ROW",5802,"C055",1
+"ROW",9492,"C055",1
+"ROW",6670,"C055",1
+"ROW",482,"C055",1
+"ROW",7945,"C055",1
+"ROW",4684,"C055",1
+"ROW",3307,"C055",1
+"ROW",4131,"C056",2
+"ROW",2713,"C056",2
+"ROW",934,"C056",2
+"ROW",9444,"C056",2
+"ROW",603,"C056",2
+"ROW",7266,"C056",2
+"ROW",9204,"C056",2
+"ROW",2811,"C061",1
+"ROW",7427,"C061",1
+"ROW",3909,"C061",1
+"ROW",8679,"C061",1
+"ROW",8897,"C061",1
+"ROW",4086,"C061",1
+"ROW",272,"C066",0
+"ROW",4967,"C066",0
+"ROW",5684,"C066",0
+"ROW",7411,"C066",0
+"ROW",3676,"C066",0
+"ROW",4302,"C066",0
+"ROW",5746,"C066",0
+"ROW",2803,"C067",1
+"ROW",8152,"C067",1
+"ROW",7973,"C067",1
+"ROW",9018,"C067",1
+"ROW",49,"C067",1
+"ROW",8617,"C067",1
+"ROW",5752,"C067",1
+"ROW",5674,"C068",0
+"ROW",2365,"C068",0
+"ROW",851,"C068",0
+"ROW",310,"C068",0
+"ROW",3881,"C068",0
+"ROW",1278,"C068",0
+"ROW",4603,"C068",0
+"ROW",2452,"C076",0
+"ROW",2657,"C076",0
+"ROW",2792,"C076",0
+"ROW",1294,"C076",0
+"ROW",9145,"C076",0
+"ROW",9033,"C078",0
+"ROW",9222,"C078",0
+"ROW",8185,"C078",0
+"ROW",6825,"C078",0
+"ROW",2127,"C078",0
+"ROW",3852,"C078",0
+"ROW",7085,"C078",0
+"ROW",7798,"C083",0
+"ROW",5083,"C083",0
+"ROW",4820,"C083",0
+"ROW",3534,"C083",0
+"SOD",9733,"CW167",0
+"SOD",4787,"CW167",0
+"SOD",2135,"CW167",0
+"SOD",9858,"CW167",0
+"SOD",1902,"CW168",0
+"SOD",8445,"CW168",0
+"SOD",5426,"CW168",0
+"SOD",9302,"CW168",0
+"SOD",9854,"CW173",0
+"SOD",3942,"CW173",0
+"SOD",4852,"CW173",0
+"SOD",3263,"CW173",0
+"SOD",7525,"CW173",0
+"SOD",5134,"CW174",0
+"SOD",8891,"CW170",0
+"SOD",3542,"CW170",0
+"SOD",9106,"CW170",0
+"SOD",4081,"CW170",0
+"SOD",8496,"CW176",0
+"SOD",8059,"CW176",0
+"SOD",5223,"CW176",0
+"SOD",8768,"CW176",0
+"SOD",5477,"CW186",0
+"SOD",2042,"CW186",0
+"SOD",8531,"CW186",0
+"SOD",6045,"CW186",0
+"SOD",9774,"CW184",0
+"SOD",4705,"CW184",0
+"SOD",516,"CW184",0
+"SOD",4736,"CW184",0
+"SOD",6549,"CW184",0
+"SOD",8560,"CW181",0
+"SOD",8924,"CW181",0
+"SOD",4483,"CW180",0
+"SOD",3451,"CW180",0
+"SOD",2790,"CW121",0
+"SOD",8181,"CW121",0
+"SOD",1499,"CW121",0
+"SOD",5333,"CW121",0
+"SOD",9440,"CW121",0
+"SOD",9527,"CW004",1
+"SOD",8822,"CW004",1
+"SOD",3634,"CW004",1
+"WAR",7928,"204",0
+"WAR",1988,"204",0
+"WAR",8684,"274",3
+"WAR",7734,"274",3
+"WAR",8010,"274",3
+"WAR",3833,"300",3
+"WAR",3528,"300",3
+"WAR",9755,"340",1
+"WAR",4878,"340",1
+"WAR",453,"340",1
+"WAR",3090,"530",1
+"WAR",2337,"530",1
+"WAR",7372,"530",1
+"WAR",8554,"530",1
+"WAR",7282,"583",1
+"WAR",8525,"583",1
+"WAR",4995,"583",1
+"WAR",3174,"623",0
+"WAR",2974,"651",0
+"WAR",5940,"651",0
+"WAR",5959,"651",0
+"WAR",2535,"747",0
+"WAR",5243,"747",0
+"WAR",4707,"747",0
+"WAR",5792,"804",1
+"WAR",602,"833",0
+"WAR",9398,"833",0
+"WAR",5481,"848",1
+"WAR",6958,"848",1
+"WAR",1130,"848",1
+"WAR",676,"857",1
+"WAR",5613,"857",1
+"WAR",8743,"857",1
+"WAR",5835,"955",4
+"WAR",5501,"955",4
+"WAR",8602,"955",4
+"WAR",4552,"973",0
+"WAR",9769,"973",0
+"WAR",9427,"973",0
+"WAR",7975,"973",0

+ 107 - 0
input/siblings.csv

@@ -0,0 +1,107 @@
+,corpus,child_id,n_siblings
+0,bergelson,1,1
+1,bergelson,2,1
+2,bergelson,3,1
+3,bergelson,4,4
+4,bergelson,6,0
+5,bergelson,7,0
+6,bergelson,8,0
+7,bergelson,9,0
+8,bergelson,10,1
+9,bergelson,11,0
+10,bergelson,12,0
+11,bergelson,13,2
+12,bergelson,14,1
+13,bergelson,15,2
+14,bergelson,16,0
+15,bergelson,17,1
+16,bergelson,18,1
+17,bergelson,19,0
+18,bergelson,20,0
+19,bergelson,21,0
+20,bergelson,22,0
+21,bergelson,23,4
+22,bergelson,25,0
+23,bergelson,26,1
+24,bergelson,27,0
+25,bergelson,28,0
+26,bergelson,29,0
+27,bergelson,30,0
+28,bergelson,31,1
+29,bergelson,32,2
+30,bergelson,33,0
+31,bergelson,34,0
+32,bergelson,35,0
+33,bergelson,36,3
+34,bergelson,37,1
+35,bergelson,38,1
+36,bergelson,39,2
+37,bergelson,40,2
+38,bergelson,41,1
+39,bergelson,42,1
+40,bergelson,43,0
+41,bergelson,44,3
+42,bergelson,45,1
+43,bergelson,46,0
+44,lucid,C004,0
+45,lucid,C006,1
+46,lucid,C008,2
+47,lucid,C010,1
+48,lucid,C012,0
+49,lucid,C016,1
+50,lucid,C018,1
+51,lucid,C019,0
+52,lucid,C023,2
+53,lucid,C024,0
+54,lucid,C025,0
+55,lucid,C026,2
+56,lucid,C029,0
+57,lucid,C030,1
+58,lucid,C035,0
+59,lucid,C036,2
+60,lucid,C037,0
+61,lucid,C038,0
+62,lucid,C040,0
+63,lucid,C041,2
+64,lucid,C042,0
+65,lucid,C046,1
+66,lucid,C047,0
+67,lucid,C049,1
+68,lucid,C050,0
+69,lucid,C054,1
+70,lucid,C055,1
+71,lucid,C056,2
+72,lucid,C061,1
+73,lucid,C066,0
+74,lucid,C067,1
+75,lucid,C068,0
+76,lucid,C076,0
+77,lucid,C078,0
+78,lucid,C083,0
+79,warlaumont,204,0
+80,warlaumont,274,3
+81,warlaumont,300,3
+82,warlaumont,340,1
+83,warlaumont,530,1
+84,warlaumont,583,1
+85,warlaumont,623,0
+86,warlaumont,651,0
+87,warlaumont,747,0
+88,warlaumont,804,1
+89,warlaumont,833,0
+90,warlaumont,848,1
+91,warlaumont,857,1
+92,warlaumont,955,4
+93,warlaumont,973,0
+94,winnipeg,C004,1
+95,winnipeg,C121,0
+96,winnipeg,C167,0
+97,winnipeg,C168,0
+98,winnipeg,C170,0
+99,winnipeg,C173,0
+100,winnipeg,C174,0
+101,winnipeg,C176,0
+102,winnipeg,C180,0
+103,winnipeg,C181,0
+104,winnipeg,C184,0
+105,winnipeg,C186,0