[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
02/03: Fix some package search issues
From: |
Christopher Baines |
Subject: |
02/03: Fix some package search issues |
Date: |
Sat, 26 Sep 2020 11:15:07 -0400 (EDT) |
cbaines pushed a commit to branch master
in repository data-service.
commit faf46565ce688e7730330ef3db563dd39d7a294b
Author: Christopher Baines <mail@cbaines.net>
AuthorDate: Sat Sep 26 16:05:06 2020 +0100
Fix some package search issues
Previously, the name wasn't taken in to account when filtering results, so a
search like "git-annex" wouldn't find the git-annex package, since it's
synopsis or description doesn't include the name.
Filtering on the name made the queries much slower, so to address that, the
filtering by revision is moved to a separate part of the CTE, which means
PostgreSQL filters down the rows by quite a lot before it begins filtering
by
name.
Also, add in a variant of the query without dashes (-) because that helps
with
searches like ruby-engine.
---
guix-data-service/model/package.scm | 31 +++++++++++++++++++------------
1 file changed, 19 insertions(+), 12 deletions(-)
diff --git a/guix-data-service/model/package.scm
b/guix-data-service/model/package.scm
index c1de63b..ef20253 100644
--- a/guix-data-service/model/package.scm
+++ b/guix-data-service/model/package.scm
@@ -144,7 +144,19 @@ WHERE data.name IN (SELECT name FROM package_names);"))
(define query
(string-append
"
-WITH search_results AS (
+WITH revision_packages AS (
+ SELECT *
+ FROM packages
+ WHERE packages.id IN (
+ SELECT package_derivations.package_id
+ FROM package_derivations
+ INNER JOIN guix_revision_package_derivations
+ ON package_derivations.id =
guix_revision_package_derivations.package_derivation_id
+ INNER JOIN guix_revisions
+ ON guix_revision_package_derivations.revision_id = guix_revisions.id
+ WHERE guix_revisions.commit = $1
+ )
+), search_results AS (
SELECT DISTINCT ON (packages.name) packages.name,
packages.version, package_synopsis.synopsis,
package_synopsis.locale AS synopsis_locale,
@@ -162,7 +174,7 @@ WITH search_results AS (
ORDER BY licenses.name
) AS license_data
) AS licenses
- FROM packages
+ FROM revision_packages AS packages
INNER JOIN package_metadata
ON packages.package_metadata_id = package_metadata.id
LEFT OUTER JOIN locations
@@ -173,16 +185,11 @@ WITH search_results AS (
ON package_metadata_tsvectors.package_synopsis_id = package_synopsis.id
INNER JOIN package_descriptions
ON package_metadata_tsvectors.package_description_id =
package_descriptions.id
- WHERE packages.id IN (
- SELECT package_derivations.package_id
- FROM package_derivations
- INNER JOIN guix_revision_package_derivations
- ON package_derivations.id =
guix_revision_package_derivations.package_derivation_id
- INNER JOIN guix_revisions
- ON guix_revision_package_derivations.revision_id = guix_revisions.id
- WHERE guix_revisions.commit = $1
+ WHERE (
+ to_tsvector(packages.name) @@ (plainto_tsquery($2) ||
plainto_tsquery(REPLACE($2, '-', ' ')))
+ OR
+ package_metadata_tsvectors.synopsis_and_description @@ plainto_tsquery($2)
)
- AND package_metadata_tsvectors.synopsis_and_description @@
plainto_tsquery($2)
ORDER BY name,
CASE WHEN package_metadata_tsvectors.locale = 'en_US.UTF-8' THEN 2
WHEN package_metadata_tsvectors.locale = $3 THEN 1
@@ -198,7 +205,7 @@ ORDER BY (
setweight(to_tsvector(name), 'A'),
plainto_tsquery($2),
2 -- divide rank by the document length
- ) +
+ ) * 1.5 +
ts_rank_cd(
synopsis_and_description,
plainto_tsquery($2),