GGBN specific configuration

From GGBN Portal Software
Revision as of 09:29, 11 February 2016 by PatriciaKelbert (talk | contribs) (Add new statistics charts)
Jump to: navigation, search

Database

Specific views

  • Several views are required for the counts-box on start page.
CREATE 
    ALGORITHM = UNDEFINED 
    DEFINER = `root`@`localhost` 
    SQL SECURITY DEFINER
VIEW `counts` AS
    select 
        count(`ro`.`occurrenceid`) AS `counts`, 'DNA' AS `kind`
    from
        (`ggbn_index`.`rawoccurrence` `ro`
        join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
    where
        (`uk`.`kindofunit_clean` = 'DNA') 
    union select 
        count(`ro`.`occurrenceid`) AS `counts`, 'Tissues' AS `kind`
    from
        (`ggbn_index`.`rawoccurrence` `ro`
        join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
    where
        (`uk`.`kindofunit_clean` = 'tissue') 
    union select 
        count(`ro`.`occurrenceid`) AS `counts`, 'Cultures' AS `kind`
    from
        (`ggbn_index`.`rawoccurrence` `ro`
        join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
    where
        (`uk`.`kindofunit_clean` = 'Culture') 
    union select 
        count(`ro`.`occurrenceid`) AS `counts`,
        'eVouchers' AS `kind`
    from
        (`ggbn_index`.`rawoccurrence` `ro`
        join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
    where
        (`uk`.`kindofunit_clean` = 'eVoucher') 
    union select 
        count(`ro`.`occurrenceid`) AS `counts`,
        'Specimens' AS `kind`
    from
        (`ggbn_index`.`rawoccurrence` `ro`
        join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
    where
        (`uk`.`kindofunit_clean` = 'specimen') 
    union select 
        count(`ro`.`occurrenceid`) AS `counts`, 'Unknown' AS `kind`
    from
        (`ggbn_index`.`rawoccurrence` `ro`
        join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
    where
        (`uk`.`kindofunit_clean` = 'unknown') 
    union select 
        count(`ro`.`occurrenceid`) AS `counts`, 'Enviros' AS `kind`
    from
        (`ggbn_index`.`rawoccurrence` `ro`
        join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
    where
        (`uk`.`kindofunit_clean` = 'environmental sample') 
    union select 
        count(`grouped_fullscientificname`.`fullscientificname`) AS `counts`,
        'Taxa' AS `kind`
    from
        `ggbn_index`.`grouped_fullscientificname` 
    union select 
        count(distinct `families_view`.`family`) AS `counts`,
        'Families' AS `kind`
    from
        `ggbn_index`.`families_view` 
    union select 
        count(distinct `genera_view`.`genus`) AS `counts`,
        'Genera' AS `kind`
    from
        `ggbn_index`.`genera_view` 
    union select 
        count(distinct `species_view`.`species`) AS `counts`,
        'Species' AS `kind`
    from
        `ggbn_index`.`species_view`
CREATE 
    ALGORITHM = UNDEFINED 
    DEFINER = `root`@`localhost` 
    SQL SECURITY DEFINER
VIEW `families_view` AS
    select distinct
        `f`.`family` AS `family`
    from
        ((`backbone`.`family` `f`
        join `backbone`.`name` `n` ON ((`f`.`familykey` = `n`.`familyKey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`gbifKey` = `n`.`acceptedKey`))) 
    union select distinct
        `f`.`family` AS `family`
    from
        ((`backbone_col`.`family` `f`
        join `backbone_col`.`name` `n` ON ((`f`.`familykey` = `n`.`familyKey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`colKey` = `n`.`acceptedKey`))) 
    union select distinct
        `f`.`family` AS `family`
    from
        ((`backbone_ncbi`.`family` `f`
        join `backbone_ncbi`.`name` `n` ON ((`f`.`familykey` = `n`.`familyKey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`ncbiKey` = `n`.`acceptedKey`)))
CREATE 
    ALGORITHM = UNDEFINED 
    DEFINER = `root`@`localhost` 
    SQL SECURITY DEFINER
VIEW `genera_view` AS
    select distinct
        `f`.`genus` AS `genus`
    from
        ((`backbone`.`genus` `f`
        join `backbone`.`name` `n` ON ((`f`.`genuskey` = `n`.`genusKey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`gbifKey` = `n`.`acceptedKey`))) 
    union select distinct
        `f`.`genus` AS `genus`
    from
        ((`backbone_col`.`genus` `f`
        join `backbone_col`.`name` `n` ON ((`f`.`genuskey` = `n`.`genusKey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`colKey` = `n`.`acceptedKey`))) 
    union select distinct
        `f`.`genus` AS `genus`
    from
        ((`backbone_ncbi`.`genus` `f`
        join `backbone_ncbi`.`name` `n` ON ((`f`.`genuskey` = `n`.`genusKey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`ncbiKey` = `n`.`acceptedKey`)))
CREATE 
    ALGORITHM = UNDEFINED 
    DEFINER = `root`@`localhost` 
    SQL SECURITY DEFINER
VIEW `grouped_fullscientificname` AS
    select 
        `identification`.`fullScientificName` AS `fullscientificname`
    from
        `identification`
    group by `identification`.`fullScientificName`
CREATE 
    ALGORITHM = UNDEFINED 
    DEFINER = `root`@`localhost` 
    SQL SECURITY DEFINER
VIEW `species_view` AS
    select distinct
        `n2`.`canonicalName` AS `species`
    from
        ((`backbone`.`name` `n`
        join `backbone`.`name` `n2` ON ((`n`.`acceptedKey` = `n2`.`namekey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`gbifKey` = `n`.`acceptedKey`)))
    where
        ((`n`.`rank` = 'SPECIES')
            and (`n2`.`rank` = 'SPECIES')) 
    union select distinct
        `n2`.`canonicalName` AS `species`
    from
        ((`backbone_col`.`name` `n`
        join `backbone_col`.`name` `n2` ON ((`n`.`acceptedKey` = `n2`.`namekey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`colKey` = `n`.`acceptedKey`)))
    where
        ((`n`.`rank` = 'SPECIES')
            and (`n2`.`rank` = 'SPECIES')) 
    union select distinct
        `n2`.`canonicalName` AS `species`
    from
        ((`backbone_ncbi`.`name` `n`
        join `backbone_ncbi`.`name` `n2` ON ((`n`.`acceptedKey` = `n2`.`namekey`)))
        join `ggbn_index`.`identification` `i` ON ((`i`.`ncbiKey` = `n`.`acceptedKey`)))
    where
        ((`n`.`rank` = 'SPECIES')
            and (`n2`.`rank` = 'SPECIES'))

Specific columns and tables

  • The institution has to be filled in bio_datasource (city and institution, for the harvester_factoy like '%Harvester%') - but first the datasource has to be added (metadata update). It will be used for the repository/registry field in the GGBN portal.
  • The parentInstitution table is used for the contacts and must be filled manually.
  • The GGBN registry is based on the NCD software and requires an additional database connection in the config file. Contact us for more information.
  • For user management (login/shopping), a few extra tables are required. Follow the instructions from the file common/controllers/CommonPermissionController.php to create and populate them if you do not already have them from the SQL-Dump. You will also need an extra view for the statistics:
CREATE 
    ALGORITHM = UNDEFINED 
    DEFINER = `root`@`localhost` 
    SQL SECURITY DEFINER
VIEW `shopping_view` AS
    select distinct
        `oc`.`fk_tripleidstoreid` AS `tripleidstoreid`,
        `bs`.`id` AS `data_source_id`,
        `pit`.`parentInstitutionID` AS `parentInstitutionID`,
        `pit`.`institutionShort` AS `institutionShort`,
        `pit`.`logoURL` AS `logoURL`
    from
        ((`bio_datasource` `bs`
        join `occurrence` `oc` ON ((`bs`.`id` = `oc`.`fk_datasourceid`)))
        join `parentInstitution` `pit` ON ((`bs`.`fk_parentInstitutionid` = `pit`.`parentInstitutionID`)))

SOLR core for search within a website

Install apache-nutch (tested with apache-nutch-1.10, https://wiki.apache.org/nutch/NutchTutorial). You need a local SOLR instance (easier that way). Configure in the folder "urls" the urls to seed (here http://data.ggbn.org/) You need another Core on the SOLR server (called nutch).

Go in the installed folder and run: bin/crawl -i -D solr.server.url=http://localhost:8080/solr/nutch urls/ TestCrawl1002/ 2

Stop the SOLR instance for a short moment on the SOLR server, copy the local nutch/data files on the server (also in nutch data, you can choose to delete the old files first, then copy, then check the writing/reading rights in this data folder).

Restart the SOLR instance.

Note: if you have old links in the search results, delete the content of the data folder of SOLR, and run 2 times the crawl function (exact same line).

Add new statistics charts

Example: show overview for the repositories (with filters). The repository will not be an active filter in the global statistics tab. In the function actionIndex, add following variables:

$repositoriessamples = [ ];
$repositoriesvouchers = [ ];

and the calls to

	try {
			$repositoriesvouchers = $this->getRepositories( $f, 'samples' );
		} catch ( \Solarium\Exception\HttpException $e ) {
			Yii::$app->mailer->compose ()->setFrom ( Yii::$app->params ['noreplyMail'] )->setTo ( Yii::$app->params ['feedbackMail'] )->setSubject ( 'Error occured' )->setTextBody ( $e )->send ();
			return $this->render ( '/site/error', [
					"name" => "Oooops",
					"message" => "The SOLR instance seems to be gone.",
					"error" => $e
			] );
		}
		
		try {
			$repositoriessamples = $this->getRepositories ( $f, 'samples' );
		} catch ( \Solarium\Exception\HttpException $e ) {
			Yii::$app->mailer->compose ()->setFrom ( Yii::$app->params ['noreplyMail'] )->setTo ( Yii::$app->params ['feedbackMail'] )->setSubject ( 'Error occured' )->setTextBody ( $e )->send ();
			return $this->render ( '/site/error', [
					"name" => "Oooops",
					"message" => "The SOLR instance seems to be gone.",
					"error" => $e
			] );
		}

and also

'repositoriessamples' => $repositoriessamples,
'repositoriesvouchers' => $repositoriesvouchers,
to the render list

Create the function getRepositories (copy the getKindofunit method and edit the name of the SOLR facet).

public function getRepositories($filter, $type) {
		// Yii::info ( "getKindofunit " . $filter );
		$config = SOLRQueryManager::getConfigStats ();
		// create a client instance
		$client = new Solarium\Client ( $config );
		// set the adapter to http
		$client->setAdapter ( 'Solarium\Core\Client\Adapter\Http' );
	
		// get a select query instance
		$query = $client->createSelect ();
		// set fields to fetch (this overrides the default setting 'all fields')
		$query->setFields ( [
				"[]"
		] );
	
		$q = "sampletype:*";
		if ($type === "vouchers")
			$q = "sampletype:(culture OR specimen OR unknown OR eVoucher)";
			if ($type === "samples")
				$q = "-sampletype:(culture OR specimen OR unknown OR eVoucher)";
				if (! empty ( $filter ))
					$q .= " AND " . $filter;
					$q = str_replace ( "registry", "institution", $q );
					$query->setQuery ( $q );
	
	
					// get the facetset component
					$facetSet = $query->getFacetSet ();
					$facetSet->setLimit ( - 1 );
					$facetSet->setMinCount ( 1 );
					$facetSet->setSort ( "false" );
	
					// create a facet field instance and set options
					$facetSet->createFacetField ( "institution" )->setField ( "institution" );
					// this executes the query and returns the result
					$raw_response = $client->select ( $query );
	
					$result = $raw_response->getFacetSet ()->getFacet ( "institution" );
					$data = [ ];
					foreach ( $result as $doc => $count ) {
						$data [] = '["' . $doc . ' (' . $count . ')",' . $count . ']';
						// Yii::info("SUBLIST: ".$doc);
					}
	
					return "[" . implode ( ",", $data ) . "]";
	}