Skip to content

ingest.py

ingest_lab(lab_csv_path='./user_data/lab/labs.csv', project_csv_path='./user_data/lab/projects.csv', publication_csv_path='./user_data/lab/publications.csv', keyword_csv_path='./user_data/lab/keywords.csv', protocol_csv_path='./user_data/lab/protocols.csv', users_csv_path='./user_data/lab/users.csv', project_user_csv_path='./user_data/lab/project_users.csv', sources_csv_path='./user_data/lab/sources.csv', skip_duplicates=True, verbose=True)

Insert data from a CSVs into their corresponding lab schema tables.

By default, uses data from workflow_session/user_data/lab/

Parameters:

Name Type Description Default
lab_csv_path str

relative path of lab csv

'./user_data/lab/labs.csv'
project_csv_path str

relative path of project csv

'./user_data/lab/projects.csv'
publication_csv_path str

relative path of publication csv

'./user_data/lab/publications.csv'
keyword_csv_path str

relative path of keyword csv

'./user_data/lab/keywords.csv'
protocol_csv_path str

relative path of protocol csv

'./user_data/lab/protocols.csv'
users_csv_path str

relative path of users csv

'./user_data/lab/users.csv'
project_user_csv_path str

relative path of project users csv

'./user_data/lab/project_users.csv'
sources_csv_path str

relative path of sources csv

'./user_data/lab/sources.csv'
skip_duplicates bool

Default True. See DataJoint insert function

True
verbose bool

Print number inserted (i.e., table length change)

True
Source code in workflow_session/ingest.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def ingest_lab(
    lab_csv_path: str = "./user_data/lab/labs.csv",
    project_csv_path: str = "./user_data/lab/projects.csv",
    publication_csv_path: str = "./user_data/lab/publications.csv",
    keyword_csv_path: str = "./user_data/lab/keywords.csv",
    protocol_csv_path: str = "./user_data/lab/protocols.csv",
    users_csv_path: str = "./user_data/lab/users.csv",
    project_user_csv_path: str = "./user_data/lab/project_users.csv",
    sources_csv_path: str = "./user_data/lab/sources.csv",
    skip_duplicates: bool = True,
    verbose: bool = True,
):
    """Insert data from a CSVs into their corresponding lab schema tables.

    By default, uses data from workflow_session/user_data/lab/

    Args:
        lab_csv_path (str):            relative path of lab csv
        project_csv_path (str):        relative path of project csv
        publication_csv_path (str):    relative path of publication csv
        keyword_csv_path (str):        relative path of keyword csv
        protocol_csv_path (str):       relative path of protocol csv
        users_csv_path (str):          relative path of users csv
        project_user_csv_path (str):   relative path of project users csv
        sources_csv_path (str):        relative path of sources csv
        skip_duplicates (bool): Default True. See DataJoint `insert` function
        verbose (bool): Print number inserted (i.e., table length change)
    """

    # List with repeats for when mult dj.tables fed by same CSV
    csvs = [
        lab_csv_path,  # 0
        lab_csv_path,  # 1
        project_csv_path,  # 2
        project_csv_path,  # 3
        publication_csv_path,  # 4
        keyword_csv_path,  # 5
        protocol_csv_path,  # 6
        protocol_csv_path,  # 7
        users_csv_path,  # 8
        users_csv_path,  # 9
        users_csv_path,  # 10
        project_user_csv_path,  # 11
        sources_csv_path,  # 12
    ]
    tables = [
        lab.Lab(),  # 0
        lab.Location(),  # 1
        lab.Project(),  # 2
        lab.ProjectSourceCode(),  # 3
        lab.ProjectPublication(),  # 4
        lab.ProjectKeywords(),  # 5
        lab.ProtocolType(),  # 6
        lab.Protocol(),  # 7
        lab.UserRole(),  # 8
        lab.User(),  # 9
        lab.LabMembership(),  # 10
        lab.ProjectUser(),  # 11
        lab.Source(),  # 13
    ]

    ingest_csv_to_table(csvs, tables, skip_duplicates=skip_duplicates, verbose=verbose)

ingest_subjects(subject_csv_path='./user_data/subject/subjects.csv', subject_part_csv_path='./user_data/subject/subjects_part.csv', allele_csv_path='./user_data/subject/allele.csv', cage_csv_path='./user_data/subject/cage.csv', breedingpair_csv_path='./user_data/subject/breedingpair.csv', genotype_test_csv_path='./user_data/subject/genotype_test.csv', line_csv_path='./user_data/subject/line.csv', strain_csv_path='./user_data/subject/strain.csv', zygosity_csv_path='./user_data/subject/zygosity.csv', skip_duplicates=True, verbose=True)

Insert data from a subject csv into corresponding subject schema tables

By default, uses data from workflow_session/user_data/subject/

Parameters:

Name Type Description Default
subject_csv_path str

relative path of csv for subject data

'./user_data/subject/subjects.csv'
subject_part_csv_path str

relative path of csv for subject part tables

'./user_data/subject/subjects_part.csv'
allele_csv_path str

relative path of csv for alleles

'./user_data/subject/allele.csv'
cage_csv_path str

relative path of csv for cages

'./user_data/subject/cage.csv'
breedingpair_csv_path str

relative path of csv for breeding pairs

'./user_data/subject/breedingpair.csv'
genotype_test_csv_path str

relative path of csv for genotype

'./user_data/subject/genotype_test.csv'
line_csv_path str

relative path of csv for line

'./user_data/subject/line.csv'
strain_csv_path str

relative path of csv for strain

'./user_data/subject/strain.csv'
zygosity_csv_path str

relative path of csv for zygotsky

'./user_data/subject/zygosity.csv'
skip_duplicates bool

Default True. See DataJoint insert function

True
verbose bool

Print number inserted (i.e., table length change)

True
Source code in workflow_session/ingest.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
def ingest_subjects(
    subject_csv_path: str = "./user_data/subject/subjects.csv",
    subject_part_csv_path: str = "./user_data/subject/subjects_part.csv",
    allele_csv_path: str = "./user_data/subject/allele.csv",
    cage_csv_path: str = "./user_data/subject/cage.csv",
    breedingpair_csv_path: str = "./user_data/subject/breedingpair.csv",
    genotype_test_csv_path: str = "./user_data/subject/genotype_test.csv",
    line_csv_path: str = "./user_data/subject/line.csv",
    strain_csv_path: str = "./user_data/subject/strain.csv",
    zygosity_csv_path: str = "./user_data/subject/zygosity.csv",
    skip_duplicates: bool = True,
    verbose: bool = True,
):
    """Insert data from a subject csv into corresponding subject schema tables

    By default, uses data from workflow_session/user_data/subject/

    Args:
        subject_csv_path (str):        relative path of csv for subject data
        subject_part_csv_path (str):   relative path of csv for subject part tables
        allele_csv_path (str):         relative path of csv for alleles
        cage_csv_path (str):           relative path of csv for cages
        breedingpair_csv_path (str):   relative path of csv for breeding pairs
        genotype_test_csv_path (str):  relative path of csv for genotype
        line_csv_path (str):           relative path of csv for line
        strain_csv_path (str):         relative path of csv for strain
        zygosity_csv_path (str):       relative path of csv for zygotsky
        skip_duplicates (bool): Default True. See DataJoint `insert` function
        verbose (bool): Print number inserted (i.e., table length change)
    """
    csvs = [
        subject_csv_path,  # 0
        subject_csv_path,  # 1
        subject_csv_path,  # 2
        subject_part_csv_path,  # 3
        subject_part_csv_path,  # 4
        subject_part_csv_path,  # 5
        strain_csv_path,  # 6
        allele_csv_path,  # 7
        allele_csv_path,  # 8
        allele_csv_path,  # 9
        allele_csv_path,  # 10
        line_csv_path,  # 11
        line_csv_path,  # 12
        subject_part_csv_path,  # 13
        subject_part_csv_path,  # 14
        subject_part_csv_path,  # 15
        zygosity_csv_path,  # 16
        breedingpair_csv_path,  # 17
        breedingpair_csv_path,  # 18
        breedingpair_csv_path,  # 19
        breedingpair_csv_path,  # 20
        breedingpair_csv_path,  # 21
        breedingpair_csv_path,  # 22
        cage_csv_path,  # 23
        cage_csv_path,  # 24
        genotype_test_csv_path,  # 25
    ]
    tables = [
        subject.Subject(),  # 0
        subject.SubjectDeath(),  # 1
        subject.SubjectCullMethod(),  # 2
        subject.Subject.Protocol(),  # 3
        subject.Subject.User(),  # 4
        subject.Subject.Lab(),  # 5
        subject.Strain(),  # 6
        subject.Allele(),  # 7
        subject.Allele.Source(),  # 8
        genotyping.Sequence(),  # 9
        genotyping.AlleleSequence(),  # 10
        subject.Line(),  # 11
        subject.Line.Allele(),  # 12
        subject.Subject.Line(),  # 13
        subject.Subject.Strain(),  # 14
        subject.Subject.Source(),  # 15
        subject.Zygosity(),  # 16
        genotyping.BreedingPair(),  # 17
        genotyping.BreedingPair.Father(),  # 18
        genotyping.BreedingPair.Mother(),  # 19
        genotyping.Litter(),  # 20
        genotyping.Weaning(),  # 21
        genotyping.SubjectLitter(),  # 22
        genotyping.Cage(),  # 23
        genotyping.SubjectCaging(),  # 24
        genotyping.GenotypeTest(),  # 25
    ]

    ingest_csv_to_table(csvs, tables, skip_duplicates=skip_duplicates, verbose=verbose)

ingest_sessions(session_csv_path='./user_data/session/sessions.csv', skip_duplicates=True, verbose=True)

Inserts data from a sessions csv into corresponding session schema tables By default, uses data from workflow_session/user_data/session/ session_csv_path (str): relative path of session csv skip_duplicates (bool): Default True. See DataJoint insert function verbose (bool): Print number inserted (i.e., table length change)

Source code in workflow_session/ingest.py
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def ingest_sessions(
    session_csv_path: str = "./user_data/session/sessions.csv",
    skip_duplicates: bool = True,
    verbose: bool = True,
):
    """
    Inserts data from a sessions csv into corresponding session schema tables
    By default, uses data from workflow_session/user_data/session/
        session_csv_path (str):     relative path of session csv
        skip_duplicates (bool): Default True. See DataJoint `insert` function
        verbose (bool): Print number inserted (i.e., table length change)
    """
    csvs = [
        session_csv_path,
        session_csv_path,
        session_csv_path,
        session_csv_path,
        session_csv_path,
    ]
    tables = [
        session.Session(),
        session.SessionDirectory(),
        session.SessionNote(),
        session.ProjectSession(),
        session.SessionExperimenter(),
    ]

    ingest_csv_to_table(csvs, tables, skip_duplicates=skip_duplicates, verbose=verbose)