Difference between revisions of "GGBN specific configuration"
(→Add new statistics charts) |
|||
| Line 215: | Line 215: | ||
=Add new statistics charts= | =Add new statistics charts= | ||
| + | Example: show overview for the repositories (with filters). The repository will '''not''' be an active filter in the global statistics tab. | ||
| + | In the function actionIndex, add following variables: | ||
| + | <pre> | ||
| + | $repositoriessamples = [ ]; | ||
| + | $repositoriesvouchers = [ ]; | ||
| + | </pre> | ||
| + | and the calls to | ||
| + | <pre> | ||
| + | try { | ||
| + | $repositoriesvouchers = $this->getRepositories( $f, 'samples' ); | ||
| + | } catch ( \Solarium\Exception\HttpException $e ) { | ||
| + | Yii::$app->mailer->compose ()->setFrom ( Yii::$app->params ['noreplyMail'] )->setTo ( Yii::$app->params ['feedbackMail'] )->setSubject ( 'Error occured' )->setTextBody ( $e )->send (); | ||
| + | return $this->render ( '/site/error', [ | ||
| + | "name" => "Oooops", | ||
| + | "message" => "The SOLR instance seems to be gone.", | ||
| + | "error" => $e | ||
| + | ] ); | ||
| + | } | ||
| + | |||
| + | try { | ||
| + | $repositoriessamples = $this->getRepositories ( $f, 'samples' ); | ||
| + | } catch ( \Solarium\Exception\HttpException $e ) { | ||
| + | Yii::$app->mailer->compose ()->setFrom ( Yii::$app->params ['noreplyMail'] )->setTo ( Yii::$app->params ['feedbackMail'] )->setSubject ( 'Error occured' )->setTextBody ( $e )->send (); | ||
| + | return $this->render ( '/site/error', [ | ||
| + | "name" => "Oooops", | ||
| + | "message" => "The SOLR instance seems to be gone.", | ||
| + | "error" => $e | ||
| + | ] ); | ||
| + | } | ||
| + | </pre> | ||
| + | and also | ||
| + | <pre> | ||
| + | 'repositoriessamples' => $repositoriessamples, | ||
| + | 'repositoriesvouchers' => $repositoriesvouchers, | ||
| + | </pre> to the render list | ||
| + | |||
| + | Create the function <code>getRepositories</code> (copy the getKindofunit method and edit the name of the SOLR facet). | ||
| + | <pre> | ||
| + | public function getRepositories($filter, $type) { | ||
| + | // Yii::info ( "getKindofunit " . $filter ); | ||
| + | $config = SOLRQueryManager::getConfigStats (); | ||
| + | // create a client instance | ||
| + | $client = new Solarium\Client ( $config ); | ||
| + | // set the adapter to http | ||
| + | $client->setAdapter ( 'Solarium\Core\Client\Adapter\Http' ); | ||
| + | |||
| + | // get a select query instance | ||
| + | $query = $client->createSelect (); | ||
| + | // set fields to fetch (this overrides the default setting 'all fields') | ||
| + | $query->setFields ( [ | ||
| + | "[]" | ||
| + | ] ); | ||
| + | |||
| + | $q = "sampletype:*"; | ||
| + | if ($type === "vouchers") | ||
| + | $q = "sampletype:(culture OR specimen OR unknown OR eVoucher)"; | ||
| + | if ($type === "samples") | ||
| + | $q = "-sampletype:(culture OR specimen OR unknown OR eVoucher)"; | ||
| + | if (! empty ( $filter )) | ||
| + | $q .= " AND " . $filter; | ||
| + | $q = str_replace ( "registry", "institution", $q ); | ||
| + | $query->setQuery ( $q ); | ||
| + | |||
| + | |||
| + | // get the facetset component | ||
| + | $facetSet = $query->getFacetSet (); | ||
| + | $facetSet->setLimit ( - 1 ); | ||
| + | $facetSet->setMinCount ( 1 ); | ||
| + | $facetSet->setSort ( "false" ); | ||
| + | |||
| + | // create a facet field instance and set options | ||
| + | $facetSet->createFacetField ( "institution" )->setField ( "institution" ); | ||
| + | // this executes the query and returns the result | ||
| + | $raw_response = $client->select ( $query ); | ||
| + | |||
| + | $result = $raw_response->getFacetSet ()->getFacet ( "institution" ); | ||
| + | $data = [ ]; | ||
| + | foreach ( $result as $doc => $count ) { | ||
| + | $data [] = '["' . $doc . ' (' . $count . ')",' . $count . ']'; | ||
| + | // Yii::info("SUBLIST: ".$doc); | ||
| + | } | ||
| + | |||
| + | return "[" . implode ( ",", $data ) . "]"; | ||
| + | } | ||
| + | </pre> | ||
Revision as of 09:29, 11 February 2016
Contents
Database
Specific views
- Several views are required for the counts-box on start page.
CREATE
ALGORITHM = UNDEFINED
DEFINER = `root`@`localhost`
SQL SECURITY DEFINER
VIEW `counts` AS
select
count(`ro`.`occurrenceid`) AS `counts`, 'DNA' AS `kind`
from
(`ggbn_index`.`rawoccurrence` `ro`
join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
where
(`uk`.`kindofunit_clean` = 'DNA')
union select
count(`ro`.`occurrenceid`) AS `counts`, 'Tissues' AS `kind`
from
(`ggbn_index`.`rawoccurrence` `ro`
join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
where
(`uk`.`kindofunit_clean` = 'tissue')
union select
count(`ro`.`occurrenceid`) AS `counts`, 'Cultures' AS `kind`
from
(`ggbn_index`.`rawoccurrence` `ro`
join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
where
(`uk`.`kindofunit_clean` = 'Culture')
union select
count(`ro`.`occurrenceid`) AS `counts`,
'eVouchers' AS `kind`
from
(`ggbn_index`.`rawoccurrence` `ro`
join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
where
(`uk`.`kindofunit_clean` = 'eVoucher')
union select
count(`ro`.`occurrenceid`) AS `counts`,
'Specimens' AS `kind`
from
(`ggbn_index`.`rawoccurrence` `ro`
join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
where
(`uk`.`kindofunit_clean` = 'specimen')
union select
count(`ro`.`occurrenceid`) AS `counts`, 'Unknown' AS `kind`
from
(`ggbn_index`.`rawoccurrence` `ro`
join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
where
(`uk`.`kindofunit_clean` = 'unknown')
union select
count(`ro`.`occurrenceid`) AS `counts`, 'Enviros' AS `kind`
from
(`ggbn_index`.`rawoccurrence` `ro`
join `ggbn_index`.`unitkind` `uk` ON ((`ro`.`fk_kindofunitid` = `uk`.`unitkindid`)))
where
(`uk`.`kindofunit_clean` = 'environmental sample')
union select
count(`grouped_fullscientificname`.`fullscientificname`) AS `counts`,
'Taxa' AS `kind`
from
`ggbn_index`.`grouped_fullscientificname`
union select
count(distinct `families_view`.`family`) AS `counts`,
'Families' AS `kind`
from
`ggbn_index`.`families_view`
union select
count(distinct `genera_view`.`genus`) AS `counts`,
'Genera' AS `kind`
from
`ggbn_index`.`genera_view`
union select
count(distinct `species_view`.`species`) AS `counts`,
'Species' AS `kind`
from
`ggbn_index`.`species_view`
CREATE
ALGORITHM = UNDEFINED
DEFINER = `root`@`localhost`
SQL SECURITY DEFINER
VIEW `families_view` AS
select distinct
`f`.`family` AS `family`
from
((`backbone`.`family` `f`
join `backbone`.`name` `n` ON ((`f`.`familykey` = `n`.`familyKey`)))
join `ggbn_index`.`identification` `i` ON ((`i`.`gbifKey` = `n`.`acceptedKey`)))
union select distinct
`f`.`family` AS `family`
from
((`backbone_col`.`family` `f`
join `backbone_col`.`name` `n` ON ((`f`.`familykey` = `n`.`familyKey`)))
join `ggbn_index`.`identification` `i` ON ((`i`.`colKey` = `n`.`acceptedKey`)))
union select distinct
`f`.`family` AS `family`
from
((`backbone_ncbi`.`family` `f`
join `backbone_ncbi`.`name` `n` ON ((`f`.`familykey` = `n`.`familyKey`)))
join `ggbn_index`.`identification` `i` ON ((`i`.`ncbiKey` = `n`.`acceptedKey`)))
CREATE
ALGORITHM = UNDEFINED
DEFINER = `root`@`localhost`
SQL SECURITY DEFINER
VIEW `genera_view` AS
select distinct
`f`.`genus` AS `genus`
from
((`backbone`.`genus` `f`
join `backbone`.`name` `n` ON ((`f`.`genuskey` = `n`.`genusKey`)))
join `ggbn_index`.`identification` `i` ON ((`i`.`gbifKey` = `n`.`acceptedKey`)))
union select distinct
`f`.`genus` AS `genus`
from
((`backbone_col`.`genus` `f`
join `backbone_col`.`name` `n` ON ((`f`.`genuskey` = `n`.`genusKey`)))
join `ggbn_index`.`identification` `i` ON ((`i`.`colKey` = `n`.`acceptedKey`)))
union select distinct
`f`.`genus` AS `genus`
from
((`backbone_ncbi`.`genus` `f`
join `backbone_ncbi`.`name` `n` ON ((`f`.`genuskey` = `n`.`genusKey`)))
join `ggbn_index`.`identification` `i` ON ((`i`.`ncbiKey` = `n`.`acceptedKey`)))
CREATE
ALGORITHM = UNDEFINED
DEFINER = `root`@`localhost`
SQL SECURITY DEFINER
VIEW `grouped_fullscientificname` AS
select
`identification`.`fullScientificName` AS `fullscientificname`
from
`identification`
group by `identification`.`fullScientificName`
CREATE
ALGORITHM = UNDEFINED
DEFINER = `root`@`localhost`
SQL SECURITY DEFINER
VIEW `species_view` AS
select distinct
`n2`.`canonicalName` AS `species`
from
((`backbone`.`name` `n`
join `backbone`.`name` `n2` ON ((`n`.`acceptedKey` = `n2`.`namekey`)))
join `ggbn_index`.`identification` `i` ON ((`i`.`gbifKey` = `n`.`acceptedKey`)))
where
((`n`.`rank` = 'SPECIES')
and (`n2`.`rank` = 'SPECIES'))
union select distinct
`n2`.`canonicalName` AS `species`
from
((`backbone_col`.`name` `n`
join `backbone_col`.`name` `n2` ON ((`n`.`acceptedKey` = `n2`.`namekey`)))
join `ggbn_index`.`identification` `i` ON ((`i`.`colKey` = `n`.`acceptedKey`)))
where
((`n`.`rank` = 'SPECIES')
and (`n2`.`rank` = 'SPECIES'))
union select distinct
`n2`.`canonicalName` AS `species`
from
((`backbone_ncbi`.`name` `n`
join `backbone_ncbi`.`name` `n2` ON ((`n`.`acceptedKey` = `n2`.`namekey`)))
join `ggbn_index`.`identification` `i` ON ((`i`.`ncbiKey` = `n`.`acceptedKey`)))
where
((`n`.`rank` = 'SPECIES')
and (`n2`.`rank` = 'SPECIES'))
Specific columns and tables
- The institution has to be filled in bio_datasource (city and institution, for the harvester_factoy like '%Harvester%') - but first the datasource has to be added (metadata update). It will be used for the repository/registry field in the GGBN portal.
- The parentInstitution table is used for the contacts and must be filled manually.
- The GGBN registry is based on the NCD software and requires an additional database connection in the config file. Contact us for more information.
- For user management (login/shopping), a few extra tables are required. Follow the instructions from the file common/controllers/CommonPermissionController.php to create and populate them if you do not already have them from the SQL-Dump. You will also need an extra view for the statistics:
CREATE
ALGORITHM = UNDEFINED
DEFINER = `root`@`localhost`
SQL SECURITY DEFINER
VIEW `shopping_view` AS
select distinct
`oc`.`fk_tripleidstoreid` AS `tripleidstoreid`,
`bs`.`id` AS `data_source_id`,
`pit`.`parentInstitutionID` AS `parentInstitutionID`,
`pit`.`institutionShort` AS `institutionShort`,
`pit`.`logoURL` AS `logoURL`
from
((`bio_datasource` `bs`
join `occurrence` `oc` ON ((`bs`.`id` = `oc`.`fk_datasourceid`)))
join `parentInstitution` `pit` ON ((`bs`.`fk_parentInstitutionid` = `pit`.`parentInstitutionID`)))
SOLR core for search within a website
Install apache-nutch (tested with apache-nutch-1.10, https://wiki.apache.org/nutch/NutchTutorial). You need a local SOLR instance (easier that way). Configure in the folder "urls" the urls to seed (here http://data.ggbn.org/) You need another Core on the SOLR server (called nutch).
Go in the installed folder and run: bin/crawl -i -D solr.server.url=http://localhost:8080/solr/nutch urls/ TestCrawl1002/ 2
Stop the SOLR instance for a short moment on the SOLR server, copy the local nutch/data files on the server (also in nutch data, you can choose to delete the old files first, then copy, then check the writing/reading rights in this data folder).
Restart the SOLR instance.
Note: if you have old links in the search results, delete the content of the data folder of SOLR, and run 2 times the crawl function (exact same line).
Add new statistics charts
Example: show overview for the repositories (with filters). The repository will not be an active filter in the global statistics tab. In the function actionIndex, add following variables:
$repositoriessamples = [ ]; $repositoriesvouchers = [ ];
and the calls to
try {
$repositoriesvouchers = $this->getRepositories( $f, 'samples' );
} catch ( \Solarium\Exception\HttpException $e ) {
Yii::$app->mailer->compose ()->setFrom ( Yii::$app->params ['noreplyMail'] )->setTo ( Yii::$app->params ['feedbackMail'] )->setSubject ( 'Error occured' )->setTextBody ( $e )->send ();
return $this->render ( '/site/error', [
"name" => "Oooops",
"message" => "The SOLR instance seems to be gone.",
"error" => $e
] );
}
try {
$repositoriessamples = $this->getRepositories ( $f, 'samples' );
} catch ( \Solarium\Exception\HttpException $e ) {
Yii::$app->mailer->compose ()->setFrom ( Yii::$app->params ['noreplyMail'] )->setTo ( Yii::$app->params ['feedbackMail'] )->setSubject ( 'Error occured' )->setTextBody ( $e )->send ();
return $this->render ( '/site/error', [
"name" => "Oooops",
"message" => "The SOLR instance seems to be gone.",
"error" => $e
] );
}
and also
'repositoriessamples' => $repositoriessamples, 'repositoriesvouchers' => $repositoriesvouchers,to the render list
Create the function getRepositories (copy the getKindofunit method and edit the name of the SOLR facet).
public function getRepositories($filter, $type) {
// Yii::info ( "getKindofunit " . $filter );
$config = SOLRQueryManager::getConfigStats ();
// create a client instance
$client = new Solarium\Client ( $config );
// set the adapter to http
$client->setAdapter ( 'Solarium\Core\Client\Adapter\Http' );
// get a select query instance
$query = $client->createSelect ();
// set fields to fetch (this overrides the default setting 'all fields')
$query->setFields ( [
"[]"
] );
$q = "sampletype:*";
if ($type === "vouchers")
$q = "sampletype:(culture OR specimen OR unknown OR eVoucher)";
if ($type === "samples")
$q = "-sampletype:(culture OR specimen OR unknown OR eVoucher)";
if (! empty ( $filter ))
$q .= " AND " . $filter;
$q = str_replace ( "registry", "institution", $q );
$query->setQuery ( $q );
// get the facetset component
$facetSet = $query->getFacetSet ();
$facetSet->setLimit ( - 1 );
$facetSet->setMinCount ( 1 );
$facetSet->setSort ( "false" );
// create a facet field instance and set options
$facetSet->createFacetField ( "institution" )->setField ( "institution" );
// this executes the query and returns the result
$raw_response = $client->select ( $query );
$result = $raw_response->getFacetSet ()->getFacet ( "institution" );
$data = [ ];
foreach ( $result as $doc => $count ) {
$data [] = '["' . $doc . ' (' . $count . ')",' . $count . ']';
// Yii::info("SUBLIST: ".$doc);
}
return "[" . implode ( ",", $data ) . "]";
}