Changes search index format to fasten `pod search --full` command.

- Previous implementation was having the drawback of traversing and performing `gsub` on all index strings for each pod specification.
- New implementation stores words as keys and list of pods containing corresponding word inside their specification as the values for corresponding hash keys. Therefore, while searching for a query, we only need to check if query is matched with any key in index hash, if so, we will add corresponding list of spec names to a Set object. Resulted Set object gives us the search result. Using this policy, sources manager can perform a faster search operation.
parent 290eca16
...@@ -274,7 +274,7 @@ module Pod ...@@ -274,7 +274,7 @@ module Pod
# @return [Pathname] The file to use to cache the search data. # @return [Pathname] The file to use to cache the search data.
# #
def search_index_file def search_index_file
cache_root + 'search_index.yaml' cache_root + 'search_index.json'
end end
private private
......
...@@ -110,25 +110,18 @@ module Pod ...@@ -110,25 +110,18 @@ module Pod
# #
# @raise If no source including the set can be found. # @raise If no source including the set can be found.
# #
# @note Full text search requires to load the specification for each
# pod, hence is considerably slower.
#
# @return [Array<Set>] The sets that contain the search term. # @return [Array<Set>] The sets that contain the search term.
# #
def search_by_name(query, full_text_search = false) def search_by_name(query, full_text_search = false)
if full_text_search if full_text_search
set_names = [] set_names = Set.new
query_regexp = /#{query}/i query_regexp = /#{query}/i
updated_search_index.each do |name, set_data| updated_search_index.each_value do |word_spec_hash|
texts = [name] word_spec_hash.each_pair do |word, spec_names|
if full_text_search set_names.merge(spec_names) unless word !~ query_regexp
texts << set_data['authors'].to_s if set_data['authors']
texts << set_data['summary'] if set_data['summary']
texts << set_data['description'] if set_data['description']
end end
set_names << name unless texts.grep(query_regexp).empty?
end end
sets = set_names.sort.map do |name| sets = set_names.map do |name|
aggregate.representative_set(name) aggregate.representative_set(name)
end end
else else
...@@ -142,41 +135,64 @@ module Pod ...@@ -142,41 +135,64 @@ module Pod
sets sets
end end
# Creates or updates the search data and returns it. The search data # Returns the search data. If a saved search data exists, retrieves it from file and returns it.
# groups by name the following information for each set: # Else, creates the search data from scratch, saves it to file system, and returns it.
# Search data is grouped by source repos. For each source, it contains a hash where keys are words
# and values are the pod names containing corresponding word.
# #
# For each source, list of unique words are generated from the following spec information.
# - version # - version
# - summary # - summary
# - description # - description
# - authors # - authors
# #
# @note This operation is fairly expensive, because of the YAML # @return [Hash{String => Hash{String => Array<String>}}] The up to date search data.
# conversion.
#
# @return [Hash{String => String}] The up to date search data.
# #
def updated_search_index def updated_search_index
index = stored_search_index
unless index
# Create index from scratch
UI.puts 'Creating search index..'
index = {}
all.each do |source|
source_name = source.name
index[source_name] = aggregate.generate_search_index_for_source(source)
end
save_search_index(index)
UI.puts 'Search index creation done!'
end
index
end
# Returns the search data stored in the file system.
# If existing data in the file system is not valid, returns nil.
#
def stored_search_index
unless @updated_search_index unless @updated_search_index
if search_index_path.exist? if search_index_path.exist?
require 'yaml' require 'json'
stored_index = YAML.load(search_index_path.read) index = JSON.parse(search_index_path.read)
if stored_index && stored_index.is_a?(Hash) if index && index.is_a?(Hash) # TODO: should we also check if hash has correct hierarchy?
search_index = aggregate.update_search_index(stored_index) return @updated_search_index = index
else
search_index = aggregate.generate_search_index
end end
else
search_index = aggregate.generate_search_index
end
File.open(search_index_path, 'w') do |file|
file.write(search_index.to_yaml)
end end
@updated_search_index = search_index @updated_search_index = nil
end end
@updated_search_index @updated_search_index
end end
# Stores given search data in the file system.
# @param [Hash] index
# Index to be saved in file system
#
def save_search_index(index)
require 'json'
@updated_search_index = index
File.open(search_index_path, 'w') do |file|
file.write(@updated_search_index.to_json)
end
end
# Allows to clear the search index. # Allows to clear the search index.
# #
attr_writer :updated_search_index attr_writer :updated_search_index
...@@ -192,6 +208,35 @@ module Pod ...@@ -192,6 +208,35 @@ module Pod
extend Executable extend Executable
executable :git executable :git
# Updates the stored search index if there are changes in spec repos while updating them.
# Update is performed incrementally. Only the changed pods' search data is re-generated and updated.
# @param [Hash{Source => Array<String>}] changed_spec_paths
# A hash containing changed specification paths for each source.
def update_search_index_if_needed(changed_spec_paths)
search_index = nil
changed_spec_paths.each_pair do |source, spec_paths|
source_name = source.name
next unless spec_paths.length > 0
search_index = stored_search_index
updated_pods = source.pods_for_specification_paths(spec_paths)
new_index = aggregate.generate_search_index_for_changes_in_source(source, spec_paths)
next unless search_index && search_index[source_name]
# First traverse search_index and update existing words
# Removed traversed words from new_index after adding to search_index,
# so that only non existing words will remain in new_index after enumeration completes.
search_index[source_name].each_pair do |word, _|
if new_index[word]
search_index[source_name][word] |= new_index[word]
else
search_index[source_name][word] -= updated_pods
end
end
# Now add non existing words remained in new_index to search_index
search_index[source_name].merge!(new_index)
end
save_search_index(search_index) if search_index
end
# Updates the local clone of the spec-repo with the given name or of all # Updates the local clone of the spec-repo with the given name or of all
# the git repos if the name is omitted. # the git repos if the name is omitted.
# #
...@@ -208,11 +253,14 @@ module Pod ...@@ -208,11 +253,14 @@ module Pod
sources = git_sources sources = git_sources
end end
changed_spec_paths = {}
sources.each do |source| sources.each do |source|
UI.section "Updating spec repo `#{source.name}`" do UI.section "Updating spec repo `#{source.name}`" do
Dir.chdir(source.repo) do Dir.chdir(source.repo) do
begin begin
prev_commit_hash = (git! %w(rev-parse HEAD)).strip
output = git! %w(pull --ff-only) output = git! %w(pull --ff-only)
changed_spec_paths[source] = (git! %W(diff --name-only #{prev_commit_hash}..HEAD)).strip.split("\n")
UI.puts output if show_output && !config.verbose? UI.puts output if show_output && !config.verbose?
rescue Informative rescue Informative
UI.warn 'CocoaPods was not able to update the ' \ UI.warn 'CocoaPods was not able to update the ' \
...@@ -224,6 +272,11 @@ module Pod ...@@ -224,6 +272,11 @@ module Pod
check_version_information(source.repo) check_version_information(source.repo)
end end
end end
# Perform search index update operation as a subprocess.
fork do
update_search_index_if_needed(changed_spec_paths)
exit
end
end end
# Returns whether a source is a GIT repo. # Returns whether a source is a GIT repo.
......
...@@ -28,7 +28,7 @@ module Bacon ...@@ -28,7 +28,7 @@ module Bacon
SpecHelper.temporary_directory.mkpath SpecHelper.temporary_directory.mkpath
# TODO # TODO
::Pod::SourcesManager.stubs(:search_index_path).returns(temporary_directory + 'search_index.yaml') ::Pod::SourcesManager.stubs(:search_index_path).returns(temporary_directory + 'search_index.json')
old_run_requirement.bind(self).call(description, spec) old_run_requirement.bind(self).call(description, spec)
end end
......
...@@ -161,7 +161,7 @@ module Pod ...@@ -161,7 +161,7 @@ module Pod
end end
it 'returns the search index file' do it 'returns the search index file' do
@config.search_index_file.to_s.should.end_with?('search_index.yaml') @config.search_index_file.to_s.should.end_with?('search_index.json')
end end
end end
......
...@@ -86,17 +86,7 @@ module Pod ...@@ -86,17 +86,7 @@ module Pod
it "generates the search index before performing a search if it doesn't exits" do it "generates the search index before performing a search if it doesn't exits" do
SourcesManager.stubs(:all).returns([@test_source]) SourcesManager.stubs(:all).returns([@test_source])
Source::Aggregate.any_instance.expects(:generate_search_index).returns('BananaLib' => {}) Source::Aggregate.any_instance.expects(:generate_search_index_for_source).with(@test_source).returns('BananaLib' => ['BananaLib'])
Source::Aggregate.any_instance.expects(:update_search_index).never
SourcesManager.updated_search_index = nil
SourcesManager.search_by_name('BananaLib', true)
end
it 'updates the search index before performing a search if it exits' do
File.open(SourcesManager.search_index_path, 'w') { |file| file.write("---\nBananaLib:\n version: 0.0.1") }
SourcesManager.stubs(:all).returns([@test_source])
Source::Aggregate.any_instance.expects(:generate_search_index).never
Source::Aggregate.any_instance.expects(:update_search_index).returns('BananaLib' => {})
SourcesManager.updated_search_index = nil SourcesManager.updated_search_index = nil
SourcesManager.search_by_name('BananaLib', true) SourcesManager.search_by_name('BananaLib', true)
end end
...@@ -105,7 +95,7 @@ module Pod ...@@ -105,7 +95,7 @@ module Pod
SourcesManager.unstub(:search_index_path) SourcesManager.unstub(:search_index_path)
config.cache_root = Config::DEFAULTS[:cache_root] config.cache_root = Config::DEFAULTS[:cache_root]
path = SourcesManager.search_index_path.to_s path = SourcesManager.search_index_path.to_s
path.should.match %r{Library/Caches/CocoaPods/search_index.yaml} path.should.match %r{Library/Caches/CocoaPods/search_index.json}
end end
describe 'managing sources by URL' do describe 'managing sources by URL' do
...@@ -241,12 +231,6 @@ module Pod ...@@ -241,12 +231,6 @@ module Pod
UI.output.should.match /is up to date/ UI.output.should.match /is up to date/
end end
it 'uses the only fast forward git option' do
set_up_test_repo_for_update
SourcesManager.expects(:git!).with { |options| options.should.include? '--ff-only' }
SourcesManager.update(test_repo_path.basename.to_s, true)
end
it 'prints a warning if the update failed' do it 'prints a warning if the update failed' do
UI.warnings = '' UI.warnings = ''
set_up_test_repo_for_update set_up_test_repo_for_update
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment