GGBN specific configuration

From GGBN Portal Software
Jump to: navigation, search

Database

Specific views

  • Several views are required for the counts-box on start page.
CREATE 
    ALGORITHM = UNDEFINED 
    DEFINER = `root`@`localhost` 
    SQL SECURITY DEFINER
VIEW `counts` AS
    select 
        count(`ro`.`occurrenceid`) AS `counts`, 'DNA' AS `kind`
    from
        (`ggbn_index`.`rawoccurrence` `ro`
        join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
    where
        (`uk`.`kindofunit_clean` = 'DNA') 
    union select 
        count(`ro`.`occurrenceid`) AS `counts`, 'Tissues' AS `kind`
    from
        (`ggbn_index`.`rawoccurrence` `ro`
        join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
    where
        (`uk`.`kindofunit_clean` = 'tissue') 
    union select 
        count(`ro`.`occurrenceid`) AS `counts`, 'Cultures' AS `kind`
    from
        (`ggbn_index`.`rawoccurrence` `ro`
        join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
    where
        (`uk`.`kindofunit_clean` = 'Culture') 
    union select 
        count(`ro`.`occurrenceid`) AS `counts`,
        'eVouchers' AS `kind`
    from
        (`ggbn_index`.`rawoccurrence` `ro`
        join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
    where
        (`uk`.`kindofunit_clean` = 'eVoucher') 
    union select 
        count(`ro`.`occurrenceid`) AS `counts`,
        'Specimens' AS `kind`
    from
        (`ggbn_index`.`rawoccurrence` `ro`
        join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
    where
        (`uk`.`kindofunit_clean` = 'specimen') 
    union select 
        count(`ro`.`occurrenceid`) AS `counts`, 'Unknown' AS `kind`
    from
        (`ggbn_index`.`rawoccurrence` `ro`
        join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
    where
        (`uk`.`kindofunit_clean` = 'unknown') 
    union select 
        count(`ro`.`occurrenceid`) AS `counts`, 'Enviros' AS `kind`
    from
        (`ggbn_index`.`rawoccurrence` `ro`
        join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
    where
        (`uk`.`kindofunit_clean` = 'environmental sample') 
    union select 
        count(`grouped_fullscientificname`.`fullscientificname`) AS `counts`,
        'Taxa' AS `kind`
    from
        `ggbn_index`.`grouped_fullscientificname` 
    union select 
        count(distinct `families_view`.`family`) AS `counts`,
        'Families' AS `kind`
    from
        `ggbn_index`.`families_view` 
    union select 
        count(distinct `genera_view`.`genus`) AS `counts`,
        'Genera' AS `kind`
    from
        `ggbn_index`.`genera_view` 
    union select 
        count(distinct `species_view`.`species`) AS `counts`,
        'Species' AS `kind`
    from
        `ggbn_index`.`species_view`
CREATE 
    ALGORITHM = UNDEFINED 
    DEFINER = `root`@`localhost` 
    SQL SECURITY DEFINER
VIEW `families_view` AS
    select distinct
        `f`.`family` AS `family`
    from
        ((`backbone`.`family` `f`
        join `backbone`.`name` `n` ON ((`f`.`familykey` = `n`.`familyKey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`gbifKey` = `n`.`acceptedKey`))) 
    union select distinct
        `f`.`family` AS `family`
    from
        ((`backbone_col`.`family` `f`
        join `backbone_col`.`name` `n` ON ((`f`.`familykey` = `n`.`familyKey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`colKey` = `n`.`acceptedKey`))) 
    union select distinct
        `f`.`family` AS `family`
    from
        ((`backbone_ncbi`.`family` `f`
        join `backbone_ncbi`.`name` `n` ON ((`f`.`familykey` = `n`.`familyKey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`ncbiKey` = `n`.`acceptedKey`)))
CREATE 
    ALGORITHM = UNDEFINED 
    DEFINER = `root`@`localhost` 
    SQL SECURITY DEFINER
VIEW `genera_view` AS
    select distinct
        `f`.`genus` AS `genus`
    from
        ((`backbone`.`genus` `f`
        join `backbone`.`name` `n` ON ((`f`.`genuskey` = `n`.`genusKey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`gbifKey` = `n`.`acceptedKey`))) 
    union select distinct
        `f`.`genus` AS `genus`
    from
        ((`backbone_col`.`genus` `f`
        join `backbone_col`.`name` `n` ON ((`f`.`genuskey` = `n`.`genusKey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`colKey` = `n`.`acceptedKey`))) 
    union select distinct
        `f`.`genus` AS `genus`
    from
        ((`backbone_ncbi`.`genus` `f`
        join `backbone_ncbi`.`name` `n` ON ((`f`.`genuskey` = `n`.`genusKey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`ncbiKey` = `n`.`acceptedKey`)))
CREATE 
    ALGORITHM = UNDEFINED 
    DEFINER = `root`@`localhost` 
    SQL SECURITY DEFINER
VIEW `grouped_fullscientificname` AS
    select 
        `identification`.`fullScientificName` AS `fullscientificname`
    from
        `identification`
    group by `identification`.`fullScientificName`
CREATE 
    ALGORITHM = UNDEFINED 
    DEFINER = `root`@`localhost` 
    SQL SECURITY DEFINER
VIEW `species_view` AS
    select distinct
        `n2`.`canonicalName` AS `species`
    from
        ((`backbone`.`name` `n`
        join `backbone`.`name` `n2` ON ((`n`.`acceptedKey` = `n2`.`namekey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`gbifKey` = `n`.`acceptedKey`)))
    where
        ((`n`.`rank` = 'SPECIES')
            and (`n2`.`rank` = 'SPECIES')) 
    union select distinct
        `n2`.`canonicalName` AS `species`
    from
        ((`backbone_col`.`name` `n`
        join `backbone_col`.`name` `n2` ON ((`n`.`acceptedKey` = `n2`.`namekey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`colKey` = `n`.`acceptedKey`)))
    where
        ((`n`.`rank` = 'SPECIES')
            and (`n2`.`rank` = 'SPECIES')) 
    union select distinct
        `n2`.`canonicalName` AS `species`
    from
        ((`backbone_ncbi`.`name` `n`
        join `backbone_ncbi`.`name` `n2` ON ((`n`.`acceptedKey` = `n2`.`namekey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`ncbiKey` = `n`.`acceptedKey`)))
    where
        ((`n`.`rank` = 'SPECIES')
            and (`n2`.`rank` = 'SPECIES'))

Specific columns and tables

  • The institution has to be filled in bio_datasource (city and institution, for the harvester_factoy like '%Harvester%') - but first the datasource has to be added (metadata update). It will be used for the repository/registry field in the GGBN portal.
  • The parentInstitution table is used for the contacts and must be filled manually.
  • The GGBN registry is based on the NCD software and requires an additional database connection in the config file. Contact us for more information.
  • For user management (login/shopping), a few extra tables are required. Follow the instructions from the file common/controllers/CommonPermissionController.php to create and populate them if you do not already have them from the SQL-Dump. You will also need an extra view for the statistics:
CREATE 
    ALGORITHM = UNDEFINED 
    DEFINER = `root`@`localhost` 
    SQL SECURITY DEFINER
VIEW `shopping_view` AS
    select distinct
        `oc`.`fk_tripleidstoreid` AS `tripleidstoreid`,
        `bs`.`id` AS `data_source_id`,
        `pit`.`parentInstitutionID` AS `parentInstitutionID`,
        `pit`.`institutionShort` AS `institutionShort`,
        `pit`.`logoURL` AS `logoURL`
    from
        ((`bio_datasource` `bs`
        join `occurrence` `oc` ON ((`bs`.`id` = `oc`.`fk_datasourceid`)))
        join `parentInstitution` `pit` ON ((`bs`.`fk_parentInstitutionid` = `pit`.`parentInstitutionID`)))

SOLR core for search within a website

Install apache-nutch (tested with apache-nutch-1.10, https://wiki.apache.org/nutch/NutchTutorial). You need a local SOLR instance (easier that way). Configure in the folder "urls" the urls to seed (here http://data.ggbn.org/) You need another Core on the SOLR server (called nutch).

Go in the installed folder and run: bin/crawl -i -D solr.server.url=http://localhost:8080/solr/nutch urls/ TestCrawl1002/ 2

Stop the SOLR instance for a short moment on the SOLR server, copy the local nutch/data files on the server (also in nutch data, you can choose to delete the old files first, then copy, then check the writing/reading rights in this data folder).

Restart the SOLR instance.

Note: if you have old links in the search results, delete the content of the data folder of SOLR, and run 2 times the crawl function (exact same line).

Add new statistics charts

Example: show overview for the repositories (with filters). The repository will not be an active filter in the global statistics tab. Edit the file StatsController.php.

In the function actionIndex, add following variables:

$repositoriessamples = [ ];
$repositoriesvouchers = [ ];

and the calls to

	try {
			$repositoriesvouchers = $this->getRepositories( $f, 'vouchers' );
		} catch ( \Solarium\Exception\HttpException $e ) {
			Yii::$app->mailer->compose ()->setFrom ( Yii::$app->params ['noreplyMail'] )->setTo ( Yii::$app->params ['feedbackMail'] )->setSubject ( 'Error occured' )->setTextBody ( $e )->send ();
			return $this->render ( '/site/error', [
					"name" => "Oooops",
					"message" => "The SOLR instance seems to be gone.",
					"error" => $e
			] );
		}
		
		try {
			$repositoriessamples = $this->getRepositories ( $f, 'samples' );
		} catch ( \Solarium\Exception\HttpException $e ) {
			Yii::$app->mailer->compose ()->setFrom ( Yii::$app->params ['noreplyMail'] )->setTo ( Yii::$app->params ['feedbackMail'] )->setSubject ( 'Error occured' )->setTextBody ( $e )->send ();
			return $this->render ( '/site/error', [
					"name" => "Oooops",
					"message" => "The SOLR instance seems to be gone.",
					"error" => $e
			] );
		}

and also

'repositoriessamples' => $repositoriessamples,
'repositoriesvouchers' => $repositoriesvouchers,
to the render list

Create the function getRepositories (copy the getKindofunit method and edit the name of the SOLR facet).

public function getRepositories($filter, $type) {
		// Yii::info ( "getKindofunit " . $filter );
		$config = SOLRQueryManager::getConfigStats ();
		// create a client instance
		$client = new Solarium\Client ( $config );
		// set the adapter to http
		$client->setAdapter ( 'Solarium\Core\Client\Adapter\Http' );
	
		// get a select query instance
		$query = $client->createSelect ();
		// set fields to fetch (this overrides the default setting 'all fields')
		$query->setFields ( [
				"[]"
		] );
	
		$q = "sampletype:*";
		if ($type === "vouchers")
			$q = "sampletype:(culture OR specimen OR unknown OR eVoucher)";
			if ($type === "samples")
				$q = "-sampletype:(culture OR specimen OR unknown OR eVoucher)";
				if (! empty ( $filter ))
					$q .= " AND " . $filter;
					$q = str_replace ( "registry", "institution", $q );
					$query->setQuery ( $q );
	
	
					// get the facetset component
					$facetSet = $query->getFacetSet ();
					$facetSet->setLimit ( - 1 );
					$facetSet->setMinCount ( 1 );
					$facetSet->setSort ( "false" );
	
					// create a facet field instance and set options
					$facetSet->createFacetField ( "institution" )->setField ( "institution" );
					// this executes the query and returns the result
					$raw_response = $client->select ( $query );
	
					$result = $raw_response->getFacetSet ()->getFacet ( "institution" );
					$data = [ ];
					foreach ( $result as $doc => $count ) {
						$data [] = '["' . $doc . ' (' . $count . ')",' . $count . ']';
						// Yii::info("SUBLIST: ".$doc);
					}
	
					return "[" . implode ( ",", $data ) . "]";
	}

Now, the view has to be edited, to display the new graphs. Edit the file views/stats/index.php. Add in the "Global" tab two new divs (with new ids)

<div class="col-lg-6" id="chartrepo1" style="width: 500px; height: 400px;"></div>
<div class="col-lg-6" id="chartrepo2" style="width: 500px; height: 400px;"></div>

Edit the Javascript part: copy the code for the $kindofunitdatasamples and edit it

  <?php if(!empty($repositoriessamples) && $repositoriessamples!="[]"):?>
        var data =<?php echo $repositoriessamples;?>;
        var plot1 = jQuery.jqplot ('chartrepo1', [data],
          {
            title: 'Repositories - samples',
            grid: {borderColor: 'white', shadow: false, drawBorder: false},
            seriesDefaults: {
              // Make this a pie chart.
              renderer: jQuery.jqplot.PieRenderer,
              rendererOptions: {
                // Put data labels on the pie slices.
                // By default, labels show the percentage of the slice.
                showDataLabels: true,
                fill: false,
                sliceMargin: 4,
                lineWidth: 5,
              }
            },
            highlighter: {
                show: true,
                sizeAdjust: 7.5
              },
            legend: { show:true, location: 'e' }
          }
        );
	<?php endif;?>

	 <?php if(!empty($repositoriesvouchers) && $repositoriesvouchers!="[]"):?>
     var data =<?php echo $repositoriesvouchers;?>;
     var plot1 = jQuery.jqplot ('chartrepo2', [data],
       {
         title: 'Repositories - vouchers',
         grid: {borderColor: 'white', shadow: false, drawBorder: false},
         seriesDefaults: {
           // Make this a pie chart.
           renderer: jQuery.jqplot.PieRenderer,
           rendererOptions: {
             // Put data labels on the pie slices.
             // By default, labels show the percentage of the slice.
             showDataLabels: true,
             fill: false,
             sliceMargin: 4,
             lineWidth: 5,
           }
         },
         highlighter: {
             show: true,
             sizeAdjust: 7.5
           },
         legend: { show:true, location: 'e' }
       }
     );
	<?php endif;?>

Here you go !

RepoStats.png