convert_lovd2.php 60.6 KB
Newer Older
1 2 3 4 5 6
<?php
/*******************************************************************************
 *
 * LEIDEN OPEN VARIATION DATABASE (LOVD)
 *
 * Created     : 2016-10-04
Ivo Fokkema's avatar
Ivo Fokkema committed
7 8
 * Modified    : 2019-08-08
 * For LOVD    : 3.0-22
9
 *
Ivo Fokkema's avatar
Ivo Fokkema committed
10
 * Copyright   : 2014-2019 Leiden University Medical Center; http://www.LUMC.nl/
Ivo Fokkema's avatar
Ivo Fokkema committed
11 12
 * Programmers : M. Kroon <m.kroon@lumc.nl>
 *               Ivo F.A.C. Fokkema <I.F.A.C.Fokkema@LUMC.nl>
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
 *
 *
 * This file is part of LOVD.
 *
 * LOVD is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * LOVD is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with LOVD.  If not, see <http://www.gnu.org/licenses/>.
 *
 *************/

define('ROOT_PATH', '../');
require_once ROOT_PATH . 'inc-init.php';
require_once ROOT_PATH . 'inc-lib-form.php';
require_once ROOT_PATH . 'class/object_transcripts.php';
require_once ROOT_PATH . 'class/progress_bar.php';

38 39 40
// Global for storing warning messages during conversion.
$_WARNINGS = array();

41
// Array of field names to ignore in input.
42
$aIgnoredFields = array_flip(array('ID_sort_', 'ID_submitterid_'));
43

44 45 46 47 48 49 50 51
// Links between LOVD2-LOVD3 fields, with optional conversion function. Format:
// LOVD2_field => array(LOVD3_section, LOVD3_field, Conversion_function)
// Where LOVD2_field is an LOVD2 field name as it occurs in the input file,
// LOVD3_section is a output section name as defined as key in the
// $aImportSections variable, LOVD3_field is an LOVD3 field name and
// Conversion_function is an optional name of a function taking a LOVD2 field
// value as a string as argument and returning LOVD3 field value as a string.
$aFieldLinks = array(
Ivo Fokkema's avatar
Ivo Fokkema committed
52
    // This field maps to either VOT/Published_as or VOG/Published_as (handled later specifically).
mkroon's avatar
mkroon committed
53
    'Variant/DNA_published' =>          array('vot',        'VariantOnTranscript/Published_as'),
54
    'Variant/DBID' =>                   array('vog',        'VariantOnGenome/DBID',         'lovd_convertDBID'),
mkroon's avatar
mkroon committed
55 56
    'Variant/Restriction_site' =>       array('vog',        'VariantOnGenome/Restriction_site'),
    'Variant/Remarks' =>                array('vog',        'VariantOnGenome/Remarks'),
mkroon's avatar
mkroon committed
57 58
    'Variant/Origin' =>                 array('vog',        'VariantOnGenome/Genetic_origin', 'lovd_convertOrigin'),
    'Variant/Genetic_origin' =>         array('vog',        'VariantOnGenome/Genetic_origin', 'lovd_convertOrigin'),
59
    'Variant/Detection/Technique' =>    array('screening',  'Screening/Technique',          'lovd_convertScrTech'),
mkroon's avatar
mkroon committed
60
    'Variant/Detection_Technique' =>    array('screening',  'Screening/Technique',          'lovd_convertScrTech'),
61
    'Variant/Reference' =>              array('vog',        'VariantOnGenome/Reference','lovd_convertReference'),
mkroon's avatar
mkroon committed
62 63 64
    'Variant/Segregation' =>            array('vog',        'VariantOnGenome/Segregation'),
    'Variant/Frequency' =>              array('vog',        'VariantOnGenome/Frequency'),
    'Variant/Location' =>               array('vog',        'VariantOnTranscript/Domain'),
65
    'Patient/Patient_ID' =>             array('individual', 'Individual/Lab_ID'),
66
    'Patient/Reference' =>              array('individual', 'Individual/Reference',         'lovd_convertReference'),
67
    'Patient/Gender' =>                 array('individual', 'Individual/Gender',            'lovd_convertGender'),
68 69
    'Patient/Times_Reported' =>         array('individual', 'panel_size'),
    'Patient/Phenotype_2' =>            array('phenotype',  'Phenotype/Additional'),
70
    'Patient/Occurrence' =>             array('phenotype',  'Phenotype/Inheritance',        'lovd_convertInheritance'),
mkroon's avatar
mkroon committed
71
    'Patient/Phenotype/Inheritance' =>  array('phenotype',  'Phenotype/Inheritance',        'lovd_convertInheritance'),
72
    'Patient/Mutation/Origin' =>        array('vog',        'VariantOnGenome/Genetic_origin',   'lovd_convertOrigin'),
mkroon's avatar
mkroon committed
73 74 75 76
    'Patient/Origin/Ethnic' =>          array('individual', 'Individual/Origin/Population'),
    'Patient/Age' =>                    array('phenotype',  'Phenotype/Age'),
    'Patient/Phenotype/Age_exam' =>     array('phenotype',  'Phenotype/Age'),
    'Patient/Phenotype/Age_onset' =>    array('phenotype',  'Phenotype/Age/Onset'),
77
    'ID_pathogenic_' =>                 array('vog',        'effectid'),
78
    'ID_status_' =>                     array('vog',        'statusid',                     'lovd_convertStatus'),
mkroon's avatar
mkroon committed
79
    'ID_variant_created_by_' =>         array('vog',        'created_by',                   'lovd_convertCuratorID'),
80
    'variant_created_date_' =>          array('vog',        'created_date'),
81
    'ID_variant_edited_by_' =>          array('vog',        'edited_by',                    'lovd_convertEditedByID'),
82
    'variant_edited_date_' =>           array('vog',        'edited_date'),
mkroon's avatar
mkroon committed
83
    'ID_patient_created_by_' =>         array('individual', 'created_by',                   'lovd_convertCuratorID'),
84
    'patient_created_date_' =>          array('individual', 'created_date'),
85
    'ID_patient_edited_by_' =>          array('individual', 'edited_by',                    'lovd_convertEditedByID'),
86 87 88 89
    'patient_edited_date_' =>           array('individual', 'edited_date'),
    'ID_patientid_' =>                  array('individual', 'id',                           'lovd_autoIncIndividualID'),
    'ID_variantid_' =>                  array('vog',        'id',                           'lovd_autoIncVariantID'),
    'ID_allele_' =>                     array('vog',        'allele'),
mkroon's avatar
mkroon committed
90
    'Patient/Phenotype/Disease' =>      array('disease',    'name'),
91 92 93
);


Ivo Fokkema's avatar
Ivo Fokkema committed
94

95 96 97
// Defaults for prefixed custom column fields not mentioned in $aFieldLinks.
// (e.g. 'Patient' => array('individual', 'Individual') will cause field
// 'Patient/Origin/Population' to be linked to 'Individual/Origin/Population'
98 99 100
// in the individual section). Note that the prefixes higher up in the array
// will be preferred, so be careful to place more generic prefixes at the
// bottom.
101
$aCustomColLinks = array(
102
    'Variant/Detection' =>  array('screening', 'Screening'),
mkroon's avatar
mkroon committed
103
    'Variant' =>            array('vot', 'VariantOnTranscript'),
104
    'Patient/Phenotype' =>  array('phenotype', 'Phenotype'),
mkroon's avatar
mkroon committed
105
    'Patient' =>            array('individual', 'Individual')
106 107 108
);


Ivo Fokkema's avatar
Ivo Fokkema committed
109

110 111 112 113 114 115 116 117
// Output section information describing the LOVD3 import format. Each section
// is defined by a key and one or more settings, where only the 'output_header'
// setting is mandatory. The following settings are available:
// output_header:       Title of section in output.
// customcol_prefix:    Prefix for custom columns in this section.
// mandatory_fields:    Array of mandatory fields as keys, and default values
//                      as values.
// table:               Database table corresponding to the section.
Ivo Fokkema's avatar
Ivo Fokkema committed
118
// comments:            Comments to be stored in the import file.
119 120 121 122 123 124 125 126 127
$aImportSections = array(
    'column' =>     array(
        'output_header' =>          'Columns'),
    'gene' =>       array(
        'output_header' =>          'Genes'),
    'transcript' => array(
        'output_header' =>          'Transcripts'),
    'disease' =>    array(
        'output_header' =>          'Diseases',
128 129 130
        'mandatory_fields' =>       array('id' => '1', 'name' => '-', 'symbol' => ''),
        'comments' =>               array('Diseases listed here were not found in the database ' .
                                    '(in either name or symbol field).',
Ivo Fokkema's avatar
Ivo Fokkema committed
131 132
                                    'If this is a mistake, please edit the disease below to reflect the database contents, ' .
                                    'or edit the disease in the database to match this file, ' .
133
                                    'in order to avoid duplication of diseases in the database.')),
134 135 136 137
    'g2d' =>        array(
        'output_header' =>          'Genes_To_Diseases'),
    'individual' => array(
        'output_header' =>          'Individuals',
138
        'customcol_prefix' =>       'Individual',
139
        'mandatory_fields' =>       array('id' => '0', 'panel_size' => '1', 'statusid' => '',
Ivo Fokkema's avatar
Ivo Fokkema committed
140 141
            'owned_by' => '0', 'created_by' => '0', 'created_date' => '', 'edited_by' => '',
            'edited_date' => '')),
142 143
    'i2d' =>        array(
        'output_header' =>          'Individuals_To_Diseases',
144
        'mandatory_fields' =>       array('individualid' => '0', 'diseaseid' => '0')),
145 146
    'phenotype' =>  array(
        'output_header' =>          'Phenotypes',
147
        'customcol_prefix' =>       'Phenotype',
148
        'mandatory_fields' =>       array('id' => '0', 'diseaseid' => '0', 'individualid' => '0',
Ivo Fokkema's avatar
Ivo Fokkema committed
149
            'statusid' => '', 'owned_by' => '0', 'created_by' => '0', 'created_date' => '',
mkroon's avatar
mkroon committed
150
            'edited_by' => '', 'edited_date' => '', 'Phenotype/Inheritance' => '-')),
151 152
    'screening' =>  array(
        'output_header' =>          'Screenings',
153
        'customcol_prefix' =>       'Screening',
154
        'mandatory_fields' =>       array('id' => '0', 'individualid' => '0',
155
            'Screening/Template' => '?', 'Screening/Technique' => '?', 'owned_by' => '0',
156 157
            'created_by' => '0', 'created_date' => '', 'edited_by' => '', 'edited_date' => '',
            'variants_found' => '1')),
158 159
    's2g' =>        array(
        'output_header' =>          'Screenings_To_Genes',
Ivo Fokkema's avatar
Ivo Fokkema committed
160
        'mandatory_fields' =>       array('screeningid' => '0', 'geneid' => '')),
161 162
    'vog' =>        array(
        'output_header' =>          'Variants_On_Genome',
163
        'customcol_prefix' =>       'VariantOnGenome',
164 165
        'mandatory_fields' =>       array('id' => '0', 'allele' => '0', 'chromosome' => '0',
            'position_g_start' => '0', 'position_g_end' => '0', 'type' => '?',
166
            'VariantOnGenome/DNA' => 'g.?', 'statusid' => '', 'owned_by' => '0',
Ivo Fokkema's avatar
Ivo Fokkema committed
167
            'created_by' => '0', 'created_date' => '', 'edited_by' => '', 'edited_date' => '')),
168 169
    'vot' =>        array(
        'output_header' =>          'Variants_On_Transcripts',
170
        'customcol_prefix' =>       'VariantOnTranscript',
171 172 173
        'mandatory_fields' =>       array('id' => '0', 'transcriptid' => '0',
            'position_c_start' => '0', 'position_c_start_intron' => '0', 'position_c_end' => '0',
            'position_c_end_intron' => '0', 'VariantOnTranscript/Exon' => '?',
174
            'VariantOnTranscript/RNA' => 'r.?')),
175 176
    's2v' =>        array(
        'output_header' =>          'Screenings_To_Variants',
177
        'mandatory_fields' =>       array('screeningid' => '0', 'variantid' => '0')),
178 179 180
);


Ivo Fokkema's avatar
Ivo Fokkema committed
181

182
// Default user ID with which to overwrite user IDs in the input file. Used by
mkroon's avatar
mkroon committed
183
// lovd_convertSubmitterID() and lovd_convertCuratorID().
mkroon's avatar
mkroon committed
184 185
$sFixedSubmitterID = null;
$sFixedCuratorID = null;
186 187

// Translation array of LOVD2 user IDs to LOVD3 user IDs. Used by
mkroon's avatar
mkroon committed
188
// lovd_convertSubmitterID() and lovd_convertCuratorID().
mkroon's avatar
mkroon committed
189 190
$aSubmitterTranslationTable = array();
$aCuratorTranslationTable = array();
191 192


Ivo Fokkema's avatar
Ivo Fokkema committed
193

Ivo Fokkema's avatar
Ivo Fokkema committed
194 195


mkroon's avatar
mkroon committed
196
function lovd_autoIncIndividualID ($LOVD2PatientID)
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
{
    // ID generator for individuals.
    return lovd_getInc('lovd_autoIncIndividualID');
}





function lovd_autoIncPhenotypeID ()
{
    // ID generator for phenotypes.
    return lovd_getInc('lovd_autoIncPhenotypeID');
}





mkroon's avatar
mkroon committed
216
function lovd_autoIncVariantID ($LOVD2PatientID)
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
{
    // ID generator for variants.
    return lovd_getInc('lovd_autoIncVariantID');
}





function lovd_autoIncScreeningID ()
{
    // ID generator for screenings.
    return lovd_getInc('lovd_autoIncScreeningID');
}





Ivo Fokkema's avatar
Ivo Fokkema committed
236
function lovd_callJSONService ($sURL, $bArray = false)
237 238
{
    // Call $sURL using lovd_php_file() and return the decoded JSON output.
239
    // FIXME: Can be replaced by lovd_callMutalyzer().
Ivo Fokkema's avatar
Ivo Fokkema committed
240

241 242
    $sResponse = @join('', lovd_php_file($sURL));
    if ($sResponse) {
Ivo Fokkema's avatar
Ivo Fokkema committed
243
        return json_decode($sResponse, $bArray);
244 245 246 247 248 249 250
    }
    return false;
}




Ivo Fokkema's avatar
Ivo Fokkema committed
251

mkroon's avatar
mkroon committed
252 253 254 255
function lovd_convertCuratorID ($nLOVD2UserID)
{
    // Returns curator ID for given LOVD2 user ID. Return value is based on
    // settings for fixed (default) user ID and ID translation table, both
256 257
    // are defined in the upload form. Returns false if $nLOVD2UserID is
    // equal to zero, indicating that the submitterID should be used.
mkroon's avatar
mkroon committed
258 259 260
    global $sFixedCuratorID, $aCuratorTranslationTable, $_WARNINGS;

    // Convert curator ID.
261
    if ($nLOVD2UserID === '0') {
mkroon's avatar
mkroon committed
262 263 264 265
        // '0' in LOVD2 export means the submitter ID should be used.
        // Return false.
        return false;
    }
266
    if (ctype_digit($nLOVD2UserID) && isset($aCuratorTranslationTable[(int) $nLOVD2UserID])) {
mkroon's avatar
mkroon committed
267
        // Found match in translation table.
268
        return $aCuratorTranslationTable[(int) $nLOVD2UserID];
mkroon's avatar
mkroon committed
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
    }
    if (!is_null($sFixedCuratorID)) {
        // Default to fixed user ID.
        return $sFixedCuratorID;
    }
    // Last resort is to return the original ID.
    $_WARNINGS[] = 'Warning: unknown user ID for curator: "' . $nLOVD2UserID . '" (no ' .
        'fixed ID set and ID is not in translation table)';
    return $nLOVD2UserID;
}





Ivo Fokkema's avatar
Ivo Fokkema committed
284
function lovd_convertDBID ($sLOVD2DBID)
285
{
Ivo Fokkema's avatar
Ivo Fokkema committed
286
    // Returns an LOVD3-formatted DBID for the given $sLOVD2DBID by padding
287
    // the number with an extra '0'.
Ivo Fokkema's avatar
Ivo Fokkema committed
288

289
    $aChunks = explode('_', $sLOVD2DBID);
mkroon's avatar
mkroon committed
290 291 292 293
    $nParts = count($aChunks);
    if ($nParts > 1 && ctype_digit($aChunks[$nParts-1])) {
        $aChunks[$nParts-1] = '0' . $aChunks[$nParts-1];
        return join('_', $aChunks);
294
    }
Ivo Fokkema's avatar
Ivo Fokkema committed
295
    return $sLOVD2DBID;
296 297 298 299 300
}




Ivo Fokkema's avatar
Ivo Fokkema committed
301

302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
function lovd_convertEditedByID ($sLOVD2EditedBy)
{
    // Convert edited_by fields from LOVD2 to LOVD3. This is a wrapper for
    // lovd_convertCuratorID() with the difference that if the input is empty,
    // it will not fall back on the fixed user ID specified in the submission
    // form.
    if ($sLOVD2EditedBy != '') {
        return lovd_convertCuratorID($sLOVD2EditedBy);
    }
    return $sLOVD2EditedBy;
}




Ivo Fokkema's avatar
Ivo Fokkema committed
317
function lovd_convertGender ($sLOVD2Gender)
318 319
{
    // Returns LOVD3 gender value given LOVD2 gender value.
Ivo Fokkema's avatar
Ivo Fokkema committed
320
    if (strcasecmp($sLOVD2Gender, 'Female') === 0) {
321
        return 'F';
Ivo Fokkema's avatar
Ivo Fokkema committed
322
    } elseif (strcasecmp($sLOVD2Gender, 'Male') === 0) {
323 324
        return 'M';
    }
Ivo Fokkema's avatar
Ivo Fokkema committed
325 326 327
    // Don't lose data. If it's something we don't recognize, just return the
    //  original value.
    return $sLOVD2Gender;
328 329 330 331 332 333
}





Ivo Fokkema's avatar
Ivo Fokkema committed
334
function lovd_convertInheritance ($sLOVD2Occurrence)
335 336 337
{
    // Convert values from LOVD2's 'Patient/Occurrence' to LOVD3's
    // Individual/Inheritance.
Ivo Fokkema's avatar
Ivo Fokkema committed
338
    if (strcasecmp($sLOVD2Occurrence, 'Sporadic') === 0) {
339 340
        return 'Isolated (sporadic)';
    }
Ivo Fokkema's avatar
Ivo Fokkema committed
341 342
    // Don't lose data. If it's something we don't recognize, just return the
    //  original value.
mkroon's avatar
mkroon committed
343
    return ucfirst($sLOVD2Occurrence);
344 345 346 347 348
}




349

Ivo Fokkema's avatar
Ivo Fokkema committed
350
function lovd_convertOrigin ($sLOVD2MutationOrigin)
351 352 353
{
    // Convert LOVD2's 'Patient/Mutation/Origin' to LOVD3's
    // 'Individual/Genetic_origin'.
Ivo Fokkema's avatar
Ivo Fokkema committed
354
    if (strcasecmp($sLOVD2MutationOrigin, 'Inherited') === 0) {
355 356
        return 'Germline';
    }
Ivo Fokkema's avatar
Ivo Fokkema committed
357 358
    // Don't lose data. If it's something we don't recognize, just return the
    //  original value.
mkroon's avatar
mkroon committed
359
    return ucfirst($sLOVD2MutationOrigin);
360 361 362 363 364 365
}





Ivo Fokkema's avatar
Ivo Fokkema committed
366
function lovd_convertReference ($sLOVD2Reference)
367 368 369
{
    // Convert LOVD2-style reference to LOVD3-style. E.g.:
    // {PMID21228398:Bell 2011} => {PMID:Bell 2011:21228398}
Ivo Fokkema's avatar
Ivo Fokkema committed
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
    static $aRefs = array();

    $sLOVD2Reference = preg_replace('/{PMID(\d+):([^}]+)}/', '{PMID:\\2:\\1}', $sLOVD2Reference);
    $sLOVD2Reference = preg_replace('/{DOI([^:}]+):([^}]+)}/', '{DOI:\\2:\\1}', $sLOVD2Reference);

    if (preg_match('/{PMID:PubMed \d+ abstract:(\d+)}/', $sLOVD2Reference, $aRegs)) {
        if (isset($aRefs[$aRegs[0]])) {
            $sLOVD2Reference = str_replace($aRegs[0], $aRefs[$aRegs[0]], $sLOVD2Reference);
        } else {
            $aPubMedData = lovd_callJSONService('https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&retmode=json&id=' . $aRegs[1], true);
            if (isset($aPubMedData['result']['uids'])) {
                $sRef = '{PMID:' . preg_replace('/ [A-Z]+$/', '', $aPubMedData['result'][$aRegs[1]]['sortfirstauthor']) . ' et al (' .
                    substr($aPubMedData['result'][$aRegs[1]]['pubdate'], 0, strpos($aPubMedData['result'][$aRegs[1]]['pubdate'] . ' ', ' ')) .
                    '):' . $aRegs[1] . '}';
                $sLOVD2Reference = str_replace($aRegs[0], $sRef, $sLOVD2Reference);
                $aRefs[$aRegs[0]] = $sRef;
            }
        }
    }

    return $sLOVD2Reference;
391 392 393 394 395 396
}





397
function lovd_convertScrTech ($sLOVD2ScreeningTechniques)
398
{
Ivo Fokkema's avatar
Ivo Fokkema committed
399 400 401
    // Convert LOVD2's 'Patient/Detection/Technique' to LOVD3's
    // 'Screening/Technique'.

Ivo Fokkema's avatar
Ivo Fokkema committed
402
    $aTechniques = array_map(function ($sTechnique) {
403 404 405 406 407 408 409 410 411
        if ($sTechnique == 'mPCR') {
            return 'PCRm';
        }
        // Don't lose data. If it's something we don't recognize, just return the
        //  original value.
        return $sTechnique;
    }, explode(';', $sLOVD2ScreeningTechniques));

    return join(';', $aTechniques);
412 413 414 415
}



Ivo Fokkema's avatar
Ivo Fokkema committed
416 417


418 419 420 421 422 423 424 425 426 427 428 429 430
function lovd_convertStatus ($nLOVD2Status)
{
    // Convert LOVD2's STATUS_NEW to LOVD3's STATUS_PENDING.
    if (ctype_digit($nLOVD2Status) && intval($nLOVD2Status) === 1) {
        return STATUS_PENDING;
    }
    return $nLOVD2Status;
}





mkroon's avatar
mkroon committed
431
function lovd_convertSubmitterID ($nLOVD2UserID)
432
{
mkroon's avatar
mkroon committed
433
    // Returns submitter ID for given LOVD2 user ID. Return value is based on
434 435
    // settings for fixed (default) user ID and ID translation table, both
    // are defined in the upload form.
mkroon's avatar
mkroon committed
436
    global $sFixedSubmitterID, $aSubmitterTranslationTable, $_WARNINGS;
mkroon's avatar
mkroon committed
437

mkroon's avatar
mkroon committed
438
    // Convert curator ID.
439
    if (ctype_digit($nLOVD2UserID) && isset($aSubmitterTranslationTable[(int) $nLOVD2UserID])) {
mkroon's avatar
mkroon committed
440
        // Found match in translation table.
441
        return $aSubmitterTranslationTable[(int) $nLOVD2UserID];
mkroon's avatar
mkroon committed
442 443 444 445
    }
    if (!is_null($sFixedSubmitterID)) {
        // Default to fixed user ID.
        return $sFixedSubmitterID;
446 447
    }
    // Last resort is to return the original ID.
mkroon's avatar
mkroon committed
448 449
    $_WARNINGS[] = 'Warning: unknown user ID for submitter: "' . $nLOVD2UserID . '" (no ' .
        'fixed ID set and ID is not in translation table)';
Ivo Fokkema's avatar
Ivo Fokkema committed
450
    return $nLOVD2UserID;
451 452 453 454 455 456
}





457
function lovd_getDiseaseID ($sDiseaseName)
458
{
Ivo Fokkema's avatar
Ivo Fokkema committed
459
    // Get the ID from the database, searching the name and symbol fields for
460 461 462 463 464
    // given disease $sDiseaseName. If it is not present in the database,
    // generate and return an automatic incrementing ID. Displays an error if
    // there are multiple hits in the database.
    // Returns array with disease ID (or false if an error occurred) and a
    // boolean flag stating whether a new disease record for this ID should be
Ivo Fokkema's avatar
Ivo Fokkema committed
465 466
    // created. Returns array(false, false) if multiple matching diseases are
    // found in the DB.
467 468
    global $_DB;
    static $aKnownDiseases;
469

470
    $bNewDisease = false;
471
    if (!isset($aKnownDiseases[$sDiseaseName])) {
Ivo Fokkema's avatar
Ivo Fokkema committed
472
        $qDiseases = $_DB->query('SELECT id FROM ' . TABLE_DISEASES . ' WHERE name = ? OR symbol = ?',
473
            array($sDiseaseName, $sDiseaseName));
Ivo Fokkema's avatar
Ivo Fokkema committed
474 475
        $zDiseases = $qDiseases->fetchAllAssoc();
        if (!$zDiseases) {
476
            // Not in database: create new unique disease ID.
477
            $aKnownDiseases[$sDiseaseName] = lovd_getInc('Diseases');
478
            $bNewDisease = true;
Ivo Fokkema's avatar
Ivo Fokkema committed
479
        } elseif (count($zDiseases) > 1) {
480
            // Multiple hits in database.
481
            lovd_errorAdd('LOVD2_export', 'Error: disease name "' . $sDiseaseName .
482 483 484
                '" is ambiguous, it matches name or symbol for more than one disease in the ' .
                'database.');
            return array(false, false);
485
        } else {
486
            // Exactly one hit in database.
Ivo Fokkema's avatar
Ivo Fokkema committed
487
            $aKnownDiseases[$sDiseaseName] = $zDiseases[0]['id'];
488
        }
489
    }
490
    return array($aKnownDiseases[$sDiseaseName], $bNewDisease);
491 492 493 494 495 496 497 498
}





function lovd_getHeaders ($aData, $aFieldLinks, $aSections, $aCustomColLinks)
{
Ivo Fokkema's avatar
Ivo Fokkema committed
499 500 501 502
    // Parse the file, read the headers, map columns to its LOVD3 counterparts,
    //  add mandatory fields and prepare output columns.
    // Returns an array with two array values; the input and output records
    //  respectively. Warning messages are stored in $_WARNINGS. The
503 504 505 506 507
    // first array contains the names as defined in the header of the input
    // file. The second array contains per section field names for the output
    // file (the LOVD3 import format).
    // Returns false for both header arrays if header cannot be either found or
    // parsed.
508

509
    global $_DB, $_WARNINGS, $aIgnoredFields;
510 511

    if (!is_array($aData)) {
512
        lovd_errorAdd('LOVD2_export', 'Invalid input.');
513
        return array(false, false);
514 515
    }

Ivo Fokkema's avatar
Ivo Fokkema committed
516
    // Walk through lines until header is found, then parse the header.
517 518
    foreach ($aData as $i => $sLine) {
        $sLine = trim($sLine);
Ivo Fokkema's avatar
Ivo Fokkema committed
519
        if (empty($sLine) || $sLine{0} == '#') {
520 521 522 523
            // Ignore blank lines and comments.
            continue;
        }

Ivo Fokkema's avatar
Ivo Fokkema committed
524 525
        $aMatches = array();
        preg_match_all('/"?{{\s*([^ }]+)\s*}}"?/', $sLine, $aMatches);
526

Ivo Fokkema's avatar
Ivo Fokkema committed
527
        if (empty($aMatches[0]) || empty($aMatches[1])) {
528 529 530 531 532 533 534 535
            // Cannot find header in first non-empty, non-comment line in file. Show an error.
            break;
        }

        // Initialize output array and get field names from database per
        // section.
        $aOutputHeaders = array();
        foreach ($aSections as $sSection => $aImportSection) {
536 537 538
            if (isset($aImportSection['customcol_prefix']) &&
                ($aTable = lovd_getTableInfoByCategory($aImportSection['customcol_prefix']))
                !== false) {
539
                $aSections[$sSection]['db_fields'] =
540
                    $_DB->query('DESCRIBE ' . $aTable['table_sql'])->fetchAllColumn();
541 542 543 544 545 546
            } else {
                $aSections[$sSection]['db_fields'] = array();
            }
            $aOutputHeaders[$sSection] = array();
        }

547
        $aSectionIDs = array_keys($aSections);
Ivo Fokkema's avatar
Ivo Fokkema committed
548
        // Loop over input headers and link them to output headers, such that
549 550 551
        // $aOutputHeaders[section][i] = outHeader, where section is the output
        // section as defined in $aImportSections, i is the index of the input
        // header and outHeader is the name of the column in the output.
Ivo Fokkema's avatar
Ivo Fokkema committed
552 553
        for ($i = 0; $i < count($aMatches[1]); $i++) {
            $sHeader = $aMatches[1][$i];
mkroon's avatar
mkroon committed
554

555 556 557 558 559
            // Skip fields listed to ignore.
            if (key_exists($sHeader, $aIgnoredFields)) {
                continue;
            }

mkroon's avatar
mkroon committed
560 561
            // Special consideration for Variant/DNA_published, as it can be linked to two
            // fields: VariantOnTranscript/Published_as and VariantOnGenome/Published_as,
mkroon's avatar
mkroon committed
562
            // but the latter is preferred.
mkroon's avatar
mkroon committed
563
            if ($sHeader == 'Variant/DNA_published') {
mkroon's avatar
mkroon committed
564 565 566 567
                if (!in_array('VariantOnGenome/Published_as', $aSections['vog']['db_fields']) &&
                    in_array('VariantOnTranscript/Published_as', $aSections['vot']['db_fields'])) {
                    // Field available on VOT and not on VOG.
                    $aOutputHeaders['vot'][$i] = 'VariantOnTranscript/Published_as';
mkroon's avatar
mkroon committed
568 569
                    continue;
                } else {
mkroon's avatar
mkroon committed
570 571
                    // By default put the published_as field on VOG.
                    $aOutputHeaders['vog'][$i] = 'VariantOnGenome/Published_as';
mkroon's avatar
mkroon committed
572 573 574 575
                    continue;
                }
            }

576
            // Check if field is manually linked in $aFieldLinks.
577 578 579 580 581 582 583 584 585 586
            if (isset($aFieldLinks[$sHeader])) {
                // Use output header linked in $aFieldLinks.
                list($sSection, $sHeaderOut) = $aFieldLinks[$sHeader];
                $aOutputHeaders[$sSection][$i] = $sHeaderOut;
                continue;
            }

            // Check if header occurs as a literal DB field.
            foreach ($aSectionIDs as $sSection) {
                if (isset($aSections[$sSection]['db_fields'])  &&
587
                    in_array($sHeader, $aSections[$sSection]['db_fields'])) {
588 589 590 591 592 593 594
                    $aOutputHeaders[$sSection][$i] = $sHeader;
                    continue 2;
                }
            }

            // Try to link custom columns.
            if (strpos($sHeader, '/') !== false) {
595 596 597 598 599 600
                // Try to find default custom column translation.
                foreach ($aCustomColLinks as $sPrefix => $aCustomColDefault) {
                    if (strpos($sHeader, $sPrefix) === 0) {
                        list($sSection, $sPrefixOut) = $aCustomColDefault;
                        $sHeaderOut = str_replace($sPrefix, $sPrefixOut, $sHeader);
                        $aOutputHeaders[$sSection][$i] = $sHeaderOut;
601 602 603 604 605
                        if (!in_array($sHeaderOut, $aSections[$sSection]['db_fields'])) {
                            $_WARNINGS[] = 'Warning: linked "' . $sHeader . '" to non-existing ' .
                                           'column "' . $sHeaderOut . '" in output section "' .
                                           $aSections[$sSection]['output_header'] . '"';
                        }
606 607
                        continue 2;
                    }
608
                }
609 610 611 612 613 614 615 616 617 618 619 620 621 622

                // Try to find a DB field name with corresponding suffix.
                list(, $sFieldname) = explode('/', $sHeader, 2);
                foreach ($aSectionIDs as $sSection) {
                    $aSection = $aSections[$sSection];
                    if (isset($aSection['customcol_prefix'])) {
                        $sNewFieldName = $aSection['customcol_prefix'] . '/' . $sFieldname;
                        if (in_array($sNewFieldName, $aSection['db_fields'])) {
                            // Set output header with new LOVD3 prefix (e.g. Individual).
                            $aOutputHeaders[$sSection][$i] = $sNewFieldName;
                            continue 2;
                        }
                    }
                }
623 624 625
            }

            // Could not link input header intelligently.
626
            $_WARNINGS[] = 'Warning: could not link field "' . $sHeader . '"';
627 628
        }

629 630 631 632 633 634
        // Handle special case effectid: if this field exists for VOG, also
        // add it to VOT.
        if (in_array('effectid', $aOutputHeaders['vog'])) {
            $aOutputHeaders['vot']['effectid'] = 'effectid';
        }

Ivo Fokkema's avatar
Ivo Fokkema committed
635
        // Output header post processing.
636
        foreach ($aSections as $sSection => $aImportSection) {
637
            // Add mandatory fields.
638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
            if (isset($aImportSection['mandatory_fields'])) {
                $aMandatory = array_diff(array_keys($aImportSection['mandatory_fields']),
                    $aOutputHeaders[$sSection]);
                $aMandatory = array_combine($aMandatory, $aMandatory);
                $aOutputHeaders[$sSection] += $aMandatory;
            }

            // Sort alphabetically, but set 'id' (if present) as first header.
            uasort($aOutputHeaders[$sSection], function ($a, $b) {
                if ($a == 'id') {
                    return -1;
                } elseif ($b == 'id') {
                    return 1;
                }
                return strcasecmp($a, $b);
            });
654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674

            // Find if fields are linked more than once in this section. (outer call to
            // array_unique() is needed for when more than 2 inputs link to the same output.
            $aDuplicates = array_unique(array_diff_key($aOutputHeaders[$sSection],
                array_unique($aOutputHeaders[$sSection])));
            foreach ($aDuplicates as $sDupHeader) {
                // We get here when field $sDupHeader appears more than once.
                $aDupKeys = array_keys($aOutputHeaders[$sSection], $sDupHeader);
                $prevKey = null;
                foreach ($aDupKeys as $sKey) {
                    if (!is_null($prevKey)) {
                        $sPrevField = is_int($prevKey)? $aMatches[1][$prevKey] : $prevKey;
                        $sCurrField = is_int($sKey)? $aMatches[1][$sKey] : $sKey;
                        $_WARNINGS[] = 'Warning: output field ' . $sSection . ':' . $sDupHeader
                                       . ' is linked to both ' . $sPrevField . ' and ' .
                                       $sCurrField . '. Values for ' . $sCurrField . ' may ' .
                                       'get lost when ' . $sPrevField . ' is non-empty';
                    }
                    $prevKey = $sKey;
                }
            }
675 676
        }

677
        return array($aMatches[1], $aOutputHeaders);
678 679 680
    }

    lovd_errorAdd('LOVD2_export', 'Cannot find header in file.');
681
    return array(false, false);
682 683 684 685 686 687
}





Ivo Fokkema's avatar
Ivo Fokkema committed
688
function lovd_getInc ($sCounterName = 'default')
689
{
Ivo Fokkema's avatar
Ivo Fokkema committed
690
    // Static automatic incrementor. Returns incrementing integers across
691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707
    // consecutive function calls (starting at 1). $sCounterName allows one to
    // use multiple incrementors simultaneously.
    static $aCounters;
    if (!isset($aCounters)) {
        $aCounters = array();
    }
    if (!isset($aCounters[$sCounterName])) {
        $aCounters[$sCounterName] = 1;
    } else {
        $aCounters[$sCounterName]++;
    }
    return $aCounters[$sCounterName];
}




Ivo Fokkema's avatar
Ivo Fokkema committed
708 709

function lovd_getRecordForHeaders ($aOutputHeaders, $aRecord, $aSection = null)
710 711 712 713 714 715 716
{
    // Given output headers $aOutputHeaders with integer keys linked to fields
    // in the input record $aRecord, generate an array with the fields filled
    // with values for the corresponding links. E.g. given $aOutputHeaders =
    // array(0 => 'field1', 1 => 'field2', 'dummy' => 'field3') and $aRecord =
    // array('v1', 'v2'), this function would return array('field1' => 'v1',
    // field2 => 'v2', 'dummy' => null).
717
    global $_WARNINGS;
718 719
    $aNewRecord = array();
    foreach ($aOutputHeaders as $nInputIdx => $sHeader) {
720
        if (is_int($nInputIdx)) {
721
            // Numeric key $nInputIdx defines link to field in input record $aRecord.
722 723 724 725

            if (!isset($aNewRecord[$sHeader]) || $aNewRecord[$sHeader] === '') {
                $aNewRecord[$sHeader] = $aRecord[$nInputIdx];
            } else if ($aRecord[$nInputIdx] !== '') {
726 727 728 729
                $_WARNINGS[] = 'Warning: doubly-linked field already has a value "' .
                               $aNewRecord[$sHeader] . '", alternate value will get lost: "' .
                               $aRecord[$nInputIdx] . '"';
            }
730 731 732 733 734
        } else {
            // Leave non-linked fields empty for now. These are probably
            // mandatory fields not provided directly in the input.
            $aNewRecord[$sHeader] = null;
        }
735 736 737 738 739 740 741 742
    }

    if (!is_null($aSection) && isset($aSection['mandatory_fields'])) {
        // Set default value for mandatory fields.
        foreach ($aSection['mandatory_fields'] as $sHeader => $sDefault) {
            if (!isset($aNewRecord[$sHeader]) || $aNewRecord[$sHeader] === '') {
                $aNewRecord[$sHeader] = $sDefault;
            }
743 744 745 746 747 748 749 750 751
        }
    }
    return $aNewRecord;
}





Ivo Fokkema's avatar
Ivo Fokkema committed
752
function lovd_getSectionOutput ($aImportSection, $aOutputHeaders, $aRecords)
753 754 755
{
    // Generate LOVD3 import data format from converted LOVD2 records.

Ivo Fokkema's avatar
Ivo Fokkema committed
756 757 758
    $sOutput = "\n" . '## ' . $aImportSection['output_header'] .
               ' ## Do not remove or alter this header ##' . "\n" .
               '## Count = ' . strval(count($aRecords)) . "\n";
759

Ivo Fokkema's avatar
Ivo Fokkema committed
760
    if (isset($aImportSection['comments'])) {
761 762 763 764
        foreach ($aImportSection['comments'] as $sComment) {
            $sOutput .= '# ' . $sComment . "\n";
        }
    }
765

766 767 768 769
    // Get output for header line. array_unique() is called because headers
    // will be duplicated when multiple input fields link to the same output.
    $aUniqueHeaders = array_unique($aOutputHeaders);
    $sOutput .= implode("\t", array_map(function ($sHeader) {
770
        return '"{{' . $sHeader . '}}"';
771
    }, $aUniqueHeaders)) . "\n";
772 773 774 775

    foreach ($aRecords as $aRecord) {
        // Put record fields in same order as headers.
        $aOutRecord = array();
776
        foreach ($aUniqueHeaders as $sHeader) {
mkroon's avatar
mkroon committed
777
            $aOutRecord[] = '"' . $aRecord[$sHeader] . '"';
778
        }
Ivo Fokkema's avatar
Ivo Fokkema committed
779
        $sOutput .= implode("\t", $aOutRecord) . "\n";
780 781 782 783 784 785 786 787 788
    }
    $sOutput .= "\n";
    return $sOutput;
}





Ivo Fokkema's avatar
Ivo Fokkema committed
789
function lovd_openLOVD2ExportFile ($aRequest, $aFiles)
790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849
{
    // Returns an array with the contents of the uploaded LOVD2 export file,
    // returns false when something went wrong with opening or decoding the
    // file.

    // Find out the MIME-type of the uploaded file. Sometimes
    // mime_content_type() seems to return False. Don't stop processing if
    // that happens.
    // However, when it does report something different, mention what type was
    // found so we can debug it.
    $sType = '';
    if (function_exists('mime_content_type')) {
        $sType = mime_content_type($aFiles['LOVD2_export']['tmp_name']);
    }
    if ($sType && substr($sType, 0, 5) != 'text/') {
        // Not all systems report the regular files as "text/plain"; also
        // reported was "text/x-pascal; charset=us-ascii".
        lovd_errorAdd('LOVD2_export', 'The upload file is not a tab-delimited text file and cannot be ' .
            'imported. It seems to be of type "' . htmlspecialchars($sType) . '".');

    } else {
        $fInput = @fopen($aFiles['LOVD2_export']['tmp_name'], 'r');
        if (!$fInput) {
            lovd_errorAdd('LOVD2_export', 'Cannot open file after it was received by the server.');
        } else {
            // Open the file using file() to check the line endings, then check the encodings, try
            // to use as little memory as possible.
            // Reading the entire file in memory, because we need to detect the encoding and
            // possibly convert.
            $aData = lovd_php_file($aFiles['LOVD2_export']['tmp_name']);

            // Fix encoding problems.
            if ($aRequest['charset'] == 'auto' || !isset($aCharSets[$aRequest['charset']])) {
                // Auto detect charset, it's not given.
                // FIXME; Should we ever allow more encodings?
                $sEncoding = mb_detect_encoding(implode("\n", $aData), array('UTF-8', 'ISO-8859-1'), true);
                if (!$sEncoding) {
                    // Could not be detected.
                    lovd_errorAdd('charset', 'Could not autodetect the file\'s character ' .
                        'encoding. Please select the character encoding from from the list of ' .
                        'options.');
                } elseif ($sEncoding != 'UTF-8') {
                    // Is not UTF-8, and for sure has special chars.
                    return utf8_encode_array($aData);
                }
            } elseif ($aRequest['charset'] == 'ISO-8859-1') {
                return utf8_encode_array($aData);
            }
            return $aData;
        }
    }

    return false;
}





function lovd_parseData ($aData, $zTranscript, $aFieldLinks, $aInputHeaders, $aOutputHeaders,
850
                         $aSections, $oProgressBar)
851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878
{
    // Parse contents of input file. Return output data per section.

    global $_SETT, $_CONF;
    // Free up the session for other requests when parsing the input file.
    session_write_close();
    @set_time_limit(0);

    $nNumLines = count($aData);

    // Arrays for storing converted data per output section.
    $aVOGRecords = array();
    $aVOTRecords = array();
    $aDiseases = array();
    $aIndividuals = array();
    $aIndividuals2Diseases = array();
    $aScreenings = array();
    $aScreening2Genes = array();
    $aScreening2Variants = array();
    $aPhenotypes = array();

    $nCounter = 0;
    foreach ($aData as $i => $sLine) {
        // Set progress bar (leave 1 percent for output generation).
        $oProgressBar->setProgress((++$nCounter / $nNumLines) * 99);
        $oProgressBar->setMessage('Converting record ' . strval($nCounter) . ' of ' .
            strval($nNumLines) . '...');

Ivo Fokkema's avatar
Ivo Fokkema committed
879
        if (empty($sLine) || $sLine{0} == '#' || preg_match('/^"?{{.*/', $sLine)) {
880 881 882 883 884 885
            // Ignore blank lines, comments and the header line.
            continue;
        }

        // Loop over fields in record and convert values according to $aFieldLinks.
        $aInputRecord = explode("\t", $sLine);
886 887 888 889 890 891 892 893

        if (count($aInputRecord) != count($aInputHeaders)) {
            lovd_errorAdd('', 'Incorrect number of columns for record ' . $i .
                ' (see below), check if the number of fields corresponds with the number of ' .
                'column headings or if a field contains a tab character?<br>' . $sLine);
            return null;
        }

894 895 896
        $aRecord = array();
        for ($i = 0; $i < count($aInputRecord); $i++) {
            $sFieldName = $aInputHeaders[$i];
897
            $sFieldValue = lovd_trimField($aInputRecord[$i]);
898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913
            if (isset($aFieldLinks[$sFieldName])) {
                if (count($aFieldLinks[$sFieldName]) == 2) {
                    $aRecord[] = $sFieldValue;
                } else {
                    $aRecord[] = call_user_func($aFieldLinks[$sFieldName][2], $sFieldValue);
                }
            } else {
                // Copy field as is.
                $aRecord[] = $sFieldValue;
            }
        }

        // Handle multiple observations for single variant in one patient
        // (homozygous).
        $sRecordID = $aInputRecord[array_search('ID_variantid_', $aInputHeaders)] . '_' .
                     $aInputRecord[array_search('ID_patientid_', $aInputHeaders)];
Ivo Fokkema's avatar
Ivo Fokkema committed
914 915 916 917
        // Check if we already know this variant_id/patient_id combination. This
        //  is done to find homozygous variants as they will occur twice in the
        //  LOVD2 export file with identical variant_id/patient_id.
        if (isset($aVOGRecords[$sRecordID])) {
918 919 920 921 922 923
            // Combination variant_id/patient_id already seen, set previous
            // record allele field to 3 (homozygous). Skip this record.
            $aVOGRecords[$sRecordID]['allele'] = 3;
            continue;
        }

mkroon's avatar
mkroon committed
924 925 926
        // Get submitter ID.
        $sSubmitterID = null;
        if (($i = array_search('ID_submitterid_', $aInputHeaders)) !== false) {
927
            $sSubmitterID = lovd_convertSubmitterID($aRecord[$i]);
mkroon's avatar
mkroon committed
928 929
        }

930
        // Create new disease if necessary.
mkroon's avatar
mkroon committed
931 932
        $aDisease = lovd_getRecordForHeaders($aOutputHeaders['disease'], $aRecord,
            $aSections['disease']);
mkroon's avatar
mkroon committed
933
        if (!empty($aDisease['name'])) {
mkroon's avatar
mkroon committed
934
            list($aDisease['id'], $bCreateNewDisease) = lovd_getDiseaseID($aDisease['name']);
935
            if ($bCreateNewDisease && $aDisease['id'] !== false) {
936
                // New disease, create an output record for it.
mkroon's avatar
mkroon committed
937
                $aDiseases[] = $aDisease;
938 939 940 941
            }
        }

        // Handle individual-specific data (individual, screening, phenotype, etc.).
Ivo Fokkema's avatar
Ivo Fokkema committed
942
        if (($i = array_search('ID_patientid_', $aInputHeaders)) !== false) {
943 944 945
            $sLOVD2IndividualID = $aInputRecord[$i];
            if (!isset($aIndividuals[$sLOVD2IndividualID])) {
                // New individual, create an output record for it.
946 947
                $aIndividual = lovd_getRecordForHeaders($aOutputHeaders['individual'], $aRecord,
                    $aSections['individual']);
mkroon's avatar
mkroon committed
948
                if ($aIndividual['created_by'] === false) {
Ivo Fokkema's avatar
Ivo Fokkema committed
949
                    // No curator ID was available, set submitter ID.
mkroon's avatar
mkroon committed
950
                    $aIndividual['created_by'] = $sSubmitterID;
mkroon's avatar
mkroon committed
951 952
                }
                if ($aIndividual['edited_by'] === false) {
Ivo Fokkema's avatar
Ivo Fokkema committed
953
                    // No curator ID was available, set submitter ID.
mkroon's avatar
mkroon committed
954
                    $aIndividual['edited_by'] = $sSubmitterID;
mkroon's avatar
mkroon committed
955
                }
956 957 958
                // Use the translated submitter ID as owner, fall back to value in created_by.
                $aIndividual['owned_by'] = $sSubmitterID != ''? $sSubmitterID :
                    $aIndividual['created_by'];
959 960 961
                if (($nStatusIdx = array_search('ID_status_', $aInputHeaders)) !== false) {
                    $aIndividual['statusid'] = $aRecord[$nStatusIdx];
                }
962 963 964
                $aIndividuals[$sLOVD2IndividualID] = $aIndividual;

                // Create screening record.
965 966
                $aScreening = lovd_getRecordForHeaders($aOutputHeaders['screening'], $aRecord,
                    $aSections['screening']);
967 968 969
                $nScreeningID = lovd_autoIncScreeningID();
                $aScreening['id'] = $nScreeningID;
                $aScreening['individualid'] = $aIndividual['id'];
970 971 972 973 974
                $aScreening['owned_by'] = $aIndividual['owned_by'];
                $aScreening['created_by'] = $aIndividual['created_by'];
                $aScreening['created_date'] = $aIndividual['created_date'];
                $aScreening['edited_by'] = $aIndividual['edited_by'];
                $aScreening['edited_date'] = $aIndividual['edited_date'];
975 976 977 978 979 980 981
                $aScreenings[$sLOVD2IndividualID] = $aScreening;

                // Create screening2gene record.
                $aScreening2Genes[] = array('screeningid' => $nScreeningID,
                                            'geneid' => $zTranscript['geneid']);

                // Create phenotype record.
982 983
                $aPhenotype = lovd_getRecordForHeaders($aOutputHeaders['phenotype'], $aRecord,
                    $aSections['phenotype']);
984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005
                // Skip phenotype when there is no data in phenotype record except for ID fields.
                $bEmptyPhenotype = true;
                foreach ($aPhenotype as $sField => $sValue) {
                    if ($sValue !== '' &&
                        (!key_exists($sField, $aSections['phenotype']['mandatory_fields']) ||
                         $sValue !== $aSections['phenotype']['mandatory_fields'][$sField])) {
                        $bEmptyPhenotype = false;
                        break;
                    }
                }
                if (!$bEmptyPhenotype) {
                    $aPhenotype['id'] = lovd_autoIncPhenotypeID();
                    $aPhenotype['diseaseid'] = $aDisease['id'];
                    $aPhenotype['individualid'] = $aIndividual['id'];
                    $aPhenotype['statusid'] = $aIndividual['statusid'];
                    $aPhenotype['owned_by'] = $aIndividual['owned_by'];
                    $aPhenotype['created_by'] = $aIndividual['created_by'];
                    $aPhenotype['created_date'] = $aIndividual['created_date'];
                    $aPhenotype['edited_by'] = $aIndividual['edited_by'];
                    $aPhenotype['edited_date'] = $aIndividual['edited_date'];
                    $aPhenotypes[$sLOVD2IndividualID] = $aPhenotype;
                }
1006 1007 1008 1009

                // Create individuals2diseases record.
                $aIndividuals2Disease = lovd_getRecordForHeaders($aOutputHeaders['i2d'], $aRecord);
                $aIndividuals2Disease['individualid'] = $aIndividual['id'];
mkroon's avatar
mkroon committed
1010
                $aIndividuals2Disease['diseaseid'] = $aDisease['id'];
1011 1012 1013 1014 1015 1016 1017 1018 1019
                $aIndividuals2Diseases[] = $aIndividuals2Disease;
            }

            // Create screening2variant record.
            $aScreening2Variants[] = array(
                'screeningid' => $aScreenings[$sLOVD2IndividualID]['id'],
                'variantid' => $aRecord[array_search('ID_variantid_', $aInputHeaders)]);
        }

Ivo Fokkema's avatar
Ivo Fokkema committed
1020
        // Create VOG/VOT records.
1021 1022
        $aVOGRecord = lovd_getRecordForHeaders($aOutputHeaders['vog'], $aRecord,
            $aSections['vog']);
1023
        $aVOGRecord['chromosome'] = $zTranscript['chromosome'];
mkroon's avatar
mkroon committed
1024
        if ($aVOGRecord['created_by'] === false) {
Ivo Fokkema's avatar
Ivo Fokkema committed
1025
            // No curator ID was available, set submitter ID.
mkroon's avatar
mkroon committed
1026
            $aVOGRecord['created_by'] = $sSubmitterID;
mkroon's avatar
mkroon committed
1027
        }
Ivo Fokkema's avatar
Ivo Fokkema committed
1028 1029 1030 1031
        if ($aVOGRecord['edited_by'] === false) {
            // No curator ID was available, set submitter ID.
            $aVOGRecord['edited_by'] = $sSubmitterID;
        }
1032 1033
        // Use the translated submitter ID as owner, fall back to value in created_by.
        $aVOGRecord['owned_by'] = $sSubmitterID != ''? $sSubmitterID : $aVOGRecord['created_by'];
1034

1035 1036
        $aVOTRecord = lovd_getRecordForHeaders($aOutputHeaders['vot'], $aRecord,
            $aSections['vot']);
1037
        $aVOTRecord['id'] = $aVOGRecord['id'];
Ivo Fokkema's avatar
Ivo Fokkema committed
1038
        if (isset($aVOGRecord['effectid'])) {
1039 1040
            $aVOTRecord['effectid'] = $aVOGRecord['effectid'];
        }
1041 1042 1043
        $aVOTRecord['transcriptid'] = $zTranscript['id'];

        // Get positions on transcript/chromosome from mutalyzer for variant.
Ivo Fokkema's avatar
Ivo Fokkema committed
1044 1045
        // FIXME: Using the new lovd_getVariantInfo() function, you can skip
        //  this webservice call.
1046
        $nHGVSIdx = array_search('Variant/DNA', $aInputHeaders);
1047
        $sVariant = $aRecord[$nHGVSIdx];
1048 1049
        $aMappingInfoArgs = array(
            'LOVD_ver' => $_SETT['system']['version'],
Ivo Fokkema's avatar
Ivo Fokkema committed
1050
            'build' => $_CONF['refseq_build'],
1051 1052 1053 1054
            'accNo' => $zTranscript['id_ncbi'],
            'variant' => $sVariant);
        $sMappingURL = str_replace('/services', '', $_CONF['mutalyzer_soap_url']);
        $sMappingURL .= '/json/mappingInfo?' . http_build_query($aMappingInfoArgs);
1055
        // FIXME: Can be replaced by lovd_callMutalyzer().
1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066
        $oResponse = lovd_callJSONService($sMappingURL);
        if ($oResponse && !isset($oResponse->errorcode) && !isset($oResponse->faultcode)) {
            $aVOGRecord['position_g_start'] =   min($oResponse->start_g, $oResponse->end_g);
            $aVOGRecord['position_g_end'] =     max($oResponse->start_g, $oResponse->end_g);
            $aVOGRecord['type'] =               $oResponse->mutationType;
            $aVOTRecord['position_c_start'] =   $oResponse->startmain;
            $aVOTRecord['position_c_start_intron'] =    $oResponse->startoffset;
            $aVOTRecord['position_c_end'] =             $oResponse->endmain;
            $aVOTRecord['position_c_end_intron'] =      $oResponse->endoffset;
        }

Ivo Fokkema's avatar
Ivo Fokkema committed
1067
        // Call mutalyzer's numberConversion to get VariantOnGenome/DNA.
1068 1069 1070 1071
        $aNumberConvArgs = array(
            'build' => $_CONF['refseq_build'],
            'gene' => $zTranscript['geneid'],
            'variant' => $zTranscript['id_ncbi'] . ':' . $sVariant);
Ivo Fokkema's avatar
Ivo Fokkema committed
1072 1073
        $sNumberConvURL = str_replace('/services', '', $_CONF['mutalyzer_soap_url']) .
                          '/json/numberConversion?' . http_build_query($aNumberConvArgs);
1074
        // FIXME: Can be replaced by lovd_callMutalyzer().
1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100
        $oResponse = lovd_callJSONService($sNumberConvURL);
        if ($oResponse && !isset($oResponse->errorcode) && !isset($oResponse->faultcode) &&
            count($oResponse) > 0 && !empty($oResponse[0])) {
            $oResponseFields = explode(':', $oResponse[0], 2);
            $aVOGRecord['VariantOnGenome/DNA'] = $oResponseFields[1];
        }

        $aVOGRecords[$sRecordID] = $aVOGRecord;
        $aVOTRecords[$sRecordID] = $aVOTRecord;
    }
    return array(
        $aVOGRecords,
        $aVOTRecords,
        $aDiseases,
        $aIndividuals,
        $aIndividuals2Diseases,
        $aScreenings,
        $aScreening2Genes,
        $aScreening2Variants,
        $aPhenotypes);
}





mkroon's avatar
mkroon committed
1101 1102
function lovd_setUserIDSettings ($sFixedSubmitterIDInput, $sSubmitterTranslationTableInput,
                                 $sFixedCuratorIDInput, $sCuratorTranslationTableInput)
1103 1104 1105
{
    // Validate form settings for handling user IDs. Calls lovd_errorAdd() when
    // something went wrong. User ID settings are interpreted as follows:
Ivo Fokkema's avatar
Ivo Fokkema committed
1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121
    // $sFixedSubmitterIDInput:          Default submitter ID, should be integer
    //                                   referring to existing user.
    // $sSubmitterTranslationTableInput: Textual translation table, where every line
    //                                   contains two integers separated by
    //                                   whitespace. The first int is the LOVD2 submitter
    //                                   ID value to translate, the second int is
    //                                   the LOVD3 user ID value to which is being
    //                                   translated.
    // $sFixedCuratorIDInput:          Default user ID, should be integer
    //                                 referring to existing user.
    // $sCuratorTranslationTableInput: Textual translation table, where every line
    //                                 contains two integers separated by
    //                                 whitespace. The first int is the LOVD2 user
    //                                 ID value to translate, the second int is
    //                                 the LOVD3 user ID value to which is being
    //                                 translated.
1122

mkroon's avatar
mkroon committed
1123 1124 1125 1126 1127 1128 1129 1130 1131
    $aFixedUserInfos = array(
        array('submitter', 'submitterid_fixed', 'sFixedSubmitterID', $sFixedSubmitterIDInput),
        array('curator', 'curatorid_fixed', 'sFixedCuratorID', $sFixedCuratorIDInput),
    );

    foreach ($aFixedUserInfos as $aFixedUserInfo) {
        list($sIDtype, $sFormField, $sGlobalVar, $sInput) = $aFixedUserInfo;
        if (ctype_digit($sInput)) {
            $GLOBALS[$sGlobalVar] = $sInput;
Ivo Fokkema's avatar
Ivo Fokkema committed
1132
        } elseif (!empty($sInput)) {
mkroon's avatar
mkroon committed
1133
            lovd_errorAdd($sFormField, 'Error: Fixed ' . $sIDtype . ' ID must be numeric.');
1134 1135 1136
        }
    }

mkroon's avatar
mkroon committed
1137 1138 1139 1140 1141 1142 1143 1144
    $aTranslationInfos = array(
        array('submitter', 'submitterid_translation', 'aSubmitterTranslationTable',
            $sSubmitterTranslationTableInput),
        array('curator', 'curatorid_translation', 'aCuratorTranslationTable',
            $sCuratorTranslationTableInput),
    );

    foreach ($aTranslationInfos as $aTranslationInfo) {
Ivo Fokkema's avatar
Ivo Fokkema committed
1145
        list($sIDtype, $sFormField, $sGlobalVar, $sInput) = $aTranslationInfo;
mkroon's avatar
mkroon committed
1146
        foreach (explode("\n", $sInput) as $sLine) {
1147 1148
            $sLineClean = trim($sLine);
            if (!empty($sLineClean)) {
mkroon's avatar
mkroon committed
1149 1150
                // Parse line as text output from MySQL CLI client. E.g.:
                // | 000123 | 000234 |
Ivo Fokkema's avatar
Ivo Fokkema committed
1151
                preg_match('/^[\s|]*(\d+)[\s|]+(\d+)[\s|]*$/', $sLineClean, $m);
1152
                if (count($m) != 3 || !ctype_digit($m[1]) || !ctype_digit($m[2])) {
mkroon's avatar
mkroon committed
1153
                    // Line not parsable as translation, ignore it.
Ivo Fokkema's avatar
Ivo Fokkema committed
1154
                    lovd_errorAdd($sFormField, 'Error: ' . $sIDtype . ' ID translation table contains unparsable line: ' . htmlspecialchars($sLine) . '.');
mkroon's avatar
mkroon committed
1155
                    continue;
1156
                }
mkroon's avatar
mkroon committed
1157
                $GLOBALS[$sGlobalVar][intval($m[1])] = $m[2];
1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172
            }
        }
    }
}





function lovd_showConversionForm ($nMaxSizeLOVD, $nMaxSize)
{
    // Print HTML for the form specifying input to be converted.
    // Returns nothing.

    // Show viewlist for searching and selecting a transcript.
1173
    print('<H2>Select transcript</H2>');
1174
    $_DATA = new LOVD_Transcript();
1175
    $_DATA->setRowLink('Transcripts', 'javascript: $("input[name=\'transcriptid\']").val("{{ID}}"); return false;');
1176
    $_GET['page_size'] = 10;
1177
    $_DATA->viewList('Transcripts_convert_LOVD2', array('cols_to_skip' => array('id_', 'variants')));
1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213

    print('      <FORM action="' . CURRENT_PATH . '?' . ACTION .
        '" method="post" enctype="multipart/form-data">' . "\n");
    lovd_errorPrint();

    $aCharSets = array(
        'auto' => 'Autodetect',
        'UTF-8' => 'UTF-8 / Unicode',
        'ISO-8859-1' => 'ISO-8859-1 / Latin-1');

    // Array which will make up the form table.
    $aForm = array(
        array('POST', '', '', '', '35%', '14', '65%'),
        array('Transcript ID (click in table above)', 'Transcript to which generated import data' .
            ' will be linked.', 'text', 'transcriptid', 10),
        array('', '', 'note', 'Click the transcript in the table above to copy its ID here.'),
        'skip',
        array('Select LOVD2 export file to convert', '', 'file', 'LOVD2_export', 50),
        array('', 'Current file size limits:<BR>LOVD: ' . ($nMaxSizeLOVD/(1024*1024)) .
            'M<BR>PHP (upload_max_filesize): ' . ini_get('upload_max_filesize') .
            '<BR>PHP (post_max_size): ' .
            ini_get('post_max_size'), 'note', 'The maximum file size accepted is ' .
            round($nMaxSize/pow(1024, 2), 1) . ' MB' . ($nMaxSize == $nMaxSizeLOVD? '' :
                ', due to restrictions on this server. If you wish to have it increased, contact' .
                ' the server\'s system administrator') . '.'),
        array('Character encoding of imported file', 'If your file contains special characters ' .
            'like &egrave;, &ouml; or even just fancy quotes like &ldquo; or &rdquo;, LOVD needs ' .
            'to know the file\'s character encoding to ensure the correct display of the data.',
            'select', 'charset', 1, $aCharSets, false, false, false),
        array('', '', 'note', 'Please only change this setting in case you encounter problems ' .
            'with displaying special characters in imported data. Technical information about ' .
            'character encoding can be found <A ' .
            'href="http://en.wikipedia.org/wiki/Character_encoding" target="_blank">on Wikipedia' .
            '</A>.'),
        'skip',
        array('', '', 'print', 'User IDs in the selected LOVD2 export file are usually different' .
1214
            ' from those in the LOVD3 application. Below one can define a fixed user ID to ' .
1215
            'set all user IDs in the file to a single value. One can also specify a translation ' .
1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227
            'between LOVD2 and LOVD3 IDs. The general strategy for determining the value of user ' .
            'ID fields in the output (created_by, owned_by, edited_by) is as following:<br>' .
            '1. Non-zero value translated by "Curator ID translation table".<br>' .
            '2. Non-zero value replaced by given value in "Fixed curator ID".<br>' .
            '3. Untouched value from corresponding input field.<br>' .
            '4. Zero-valued fields ("0") are treated similarly as in steps 1-3, but using the ' .
            '"Submitter ID translation table" and "Fixed submitter ID" form fields instead.<br>' .
            'Exceptions to above steps are that the "edited_by" fields will not default to the ' .
            'fixed curator ID field value when empty, and that the "owned_by" field is first ' .
            'tried to be filled with the submitter ID and will default to whatever value was ' .
            'determined for the "created_by" field.'),
        array('Fixed submitter ID', '', 'text',
mkroon's avatar
mkroon committed
1228
            'submitterid_fixed', 10),
1229 1230 1231 1232
        array('', '', 'note', 'Fixed value for submitter ID (field "ID_submitterid_" in ' .
            'import file). All fields in output that represent a user ID and are zero "0" are ' .
            'given this fixed value instead, if there is no corresponding value in the ' .
            'translation table below.'),
mkroon's avatar
mkroon committed
1233
        array('Submitter ID translation table', '', 'textarea', 'submitterid_translation', 20, 6),
1234
        array('', '', 'note', 'Translation table for user IDs. On every line an LOVD2 user ID ' .
1235 1236 1237 1238
            'is expected, followed by an LOVD3 user ID, separated by whitespace. Any fields in ' .
            'the output file will be given a value based on this table, before defaulting to the ' .
            'fixed submitter ID defined above or the value of "ID_submitterid_" in the input ' .
            'file.'),
mkroon's avatar
mkroon committed
1239
        array('Fixed curator ID', '', 'text', 'curatorid_fixed', 10),
1240 1241 1242
        array('', '', 'note', 'Fixed value for non-zero user ID fields. All fields in the output ' .
            'that represent a non-submitter user ID will be given this value if they cannot be ' .
            'translated with the table defined below.'),
mkroon's avatar
mkroon committed
1243
        array('Curator ID translation table', '', 'textarea', 'curatorid_translation', 20, 6),
1244 1245 1246 1247
        array('', '', 'note', 'Translation table for curator user IDs. On every line an LOVD2 user ID ' .
            'is expected, followed by an LOVD3 user ID, separated by whitespace. Any fields in ' .
            'the output file will be given a value based on this table, before defaulting to the ' .
            'fixed curator ID defined above.'),
1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259
        'hr',
        array('', '', 'submit', 'Generate LOVD3 import file'),
    );
    lovd_viewForm($aForm);

    print('</FORM>' . "\n\n");
}





Ivo Fokkema's avatar
Ivo Fokkema committed
1260

1261
function lovd_validateConversionForm ($zTranscript, $nMaxSize, $nMaxSizeLOVD)
1262 1263 1264 1265
{
    // Validate fields submitted by form generated in lovd_showConversionForm().
    // Returns true if there were no errors.

1266
    if (empty($_POST['transcriptid'])) {
1267 1268 1269 1270 1271
        lovd_errorAdd('transcriptid', 'Error: No transcript selected.');
    } elseif (empty($zTranscript)) {
        lovd_errorAdd('transcriptid', 'Error: Unknown transcript.');
    }

1272 1273
    if (empty($_FILES['LOVD2_export']) || ($_FILES['LOVD2_export']['error'] > 0 &&
            $_FILES['LOVD2_export']['error'] < 4)) {
1274 1275 1276 1277
        lovd_errorAdd('LOVD2_export', 'There was a problem with the file transfer. Please try ' .
            'again. The file cannot be larger than ' . round($nMaxSize/pow(1024, 2), 1) . ' MB' .
            ($nMaxSize == $nMaxSizeLOVD? '' : ', due to restrictions on this server') . '.');

1278
    } elseif ($_FILES['LOVD2_export']['error'] == 4 || !$_FILES['LOVD2_export']['size']) {
1279