Changes search index format to fasten `pod search --full` command.

- Previous implementation was having the drawback of traversing and performing `gsub` on all index strings for each pod specification.
- New implementation stores words as keys and list of pods containing corresponding word inside their specification as the values for corresponding hash keys. Therefore, while searching for a query, we only need to check if query is matched with any key in index hash, if so, we will add corresponding list of spec names to a Set object. Resulted Set object gives us the search result. Using this policy, sources manager can perform a faster search operation.
parent 290eca16
......@@ -274,7 +274,7 @@ module Pod
# @return [Pathname] The file to use to cache the search data.
#
def search_index_file
cache_root + 'search_index.yaml'
cache_root + 'search_index.json'
end
private
......
......@@ -110,25 +110,18 @@ module Pod
#
# @raise If no source including the set can be found.
#
# @note Full text search requires to load the specification for each
# pod, hence is considerably slower.
#
# @return [Array<Set>] The sets that contain the search term.
#
def search_by_name(query, full_text_search = false)
if full_text_search
set_names = []
set_names = Set.new
query_regexp = /#{query}/i
updated_search_index.each do |name, set_data|
texts = [name]
if full_text_search
texts << set_data['authors'].to_s if set_data['authors']
texts << set_data['summary'] if set_data['summary']
texts << set_data['description'] if set_data['description']
updated_search_index.each_value do |word_spec_hash|
word_spec_hash.each_pair do |word, spec_names|
set_names.merge(spec_names) unless word !~ query_regexp
end
set_names << name unless texts.grep(query_regexp).empty?
end
sets = set_names.sort.map do |name|
sets = set_names.map do |name|
aggregate.representative_set(name)
end
else
......@@ -142,39 +135,62 @@ module Pod
sets
end
# Creates or updates the search data and returns it. The search data
# groups by name the following information for each set:
# Returns the search data. If a saved search data exists, retrieves it from file and returns it.
# Else, creates the search data from scratch, saves it to file system, and returns it.
# Search data is grouped by source repos. For each source, it contains a hash where keys are words
# and values are the pod names containing corresponding word.
#
# For each source, list of unique words are generated from the following spec information.
# - version
# - summary
# - description
# - authors
#
# @note This operation is fairly expensive, because of the YAML
# conversion.
#
# @return [Hash{String => String}] The up to date search data.
# @return [Hash{String => Hash{String => Array<String>}}] The up to date search data.
#
def updated_search_index
index = stored_search_index
unless index
# Create index from scratch
UI.puts 'Creating search index..'
index = {}
all.each do |source|
source_name = source.name
index[source_name] = aggregate.generate_search_index_for_source(source)
end
save_search_index(index)
UI.puts 'Search index creation done!'
end
index
end
# Returns the search data stored in the file system.
# If existing data in the file system is not valid, returns nil.
#
def stored_search_index
unless @updated_search_index
if search_index_path.exist?
require 'yaml'
stored_index = YAML.load(search_index_path.read)
if stored_index && stored_index.is_a?(Hash)
search_index = aggregate.update_search_index(stored_index)
else
search_index = aggregate.generate_search_index
require 'json'
index = JSON.parse(search_index_path.read)
if index && index.is_a?(Hash) # TODO: should we also check if hash has correct hierarchy?
return @updated_search_index = index
end
else
search_index = aggregate.generate_search_index
end
@updated_search_index = nil
end
@updated_search_index
end
# Stores given search data in the file system.
# @param [Hash] index
# Index to be saved in file system
#
def save_search_index(index)
require 'json'
@updated_search_index = index
File.open(search_index_path, 'w') do |file|
file.write(search_index.to_yaml)
file.write(@updated_search_index.to_json)
end
@updated_search_index = search_index
end
@updated_search_index
end
# Allows to clear the search index.
......@@ -192,6 +208,35 @@ module Pod
extend Executable
executable :git
# Updates the stored search index if there are changes in spec repos while updating them.
# Update is performed incrementally. Only the changed pods' search data is re-generated and updated.
# @param [Hash{Source => Array<String>}] changed_spec_paths
# A hash containing changed specification paths for each source.
def update_search_index_if_needed(changed_spec_paths)
search_index = nil
changed_spec_paths.each_pair do |source, spec_paths|
source_name = source.name
next unless spec_paths.length > 0
search_index = stored_search_index
updated_pods = source.pods_for_specification_paths(spec_paths)
new_index = aggregate.generate_search_index_for_changes_in_source(source, spec_paths)
next unless search_index && search_index[source_name]
# First traverse search_index and update existing words
# Removed traversed words from new_index after adding to search_index,
# so that only non existing words will remain in new_index after enumeration completes.
search_index[source_name].each_pair do |word, _|
if new_index[word]
search_index[source_name][word] |= new_index[word]
else
search_index[source_name][word] -= updated_pods
end
end
# Now add non existing words remained in new_index to search_index
search_index[source_name].merge!(new_index)
end
save_search_index(search_index) if search_index
end
# Updates the local clone of the spec-repo with the given name or of all
# the git repos if the name is omitted.
#
......@@ -208,11 +253,14 @@ module Pod
sources = git_sources
end
changed_spec_paths = {}
sources.each do |source|
UI.section "Updating spec repo `#{source.name}`" do
Dir.chdir(source.repo) do
begin
prev_commit_hash = (git! %w(rev-parse HEAD)).strip
output = git! %w(pull --ff-only)
changed_spec_paths[source] = (git! %W(diff --name-only #{prev_commit_hash}..HEAD)).strip.split("\n")
UI.puts output if show_output && !config.verbose?
rescue Informative
UI.warn 'CocoaPods was not able to update the ' \
......@@ -224,6 +272,11 @@ module Pod
check_version_information(source.repo)
end
end
# Perform search index update operation as a subprocess.
fork do
update_search_index_if_needed(changed_spec_paths)
exit
end
end
# Returns whether a source is a GIT repo.
......
......@@ -28,7 +28,7 @@ module Bacon
SpecHelper.temporary_directory.mkpath
# TODO
::Pod::SourcesManager.stubs(:search_index_path).returns(temporary_directory + 'search_index.yaml')
::Pod::SourcesManager.stubs(:search_index_path).returns(temporary_directory + 'search_index.json')
old_run_requirement.bind(self).call(description, spec)
end
......
......@@ -161,7 +161,7 @@ module Pod
end
it 'returns the search index file' do
@config.search_index_file.to_s.should.end_with?('search_index.yaml')
@config.search_index_file.to_s.should.end_with?('search_index.json')
end
end
......
......@@ -86,17 +86,7 @@ module Pod
it "generates the search index before performing a search if it doesn't exits" do
SourcesManager.stubs(:all).returns([@test_source])
Source::Aggregate.any_instance.expects(:generate_search_index).returns('BananaLib' => {})
Source::Aggregate.any_instance.expects(:update_search_index).never
SourcesManager.updated_search_index = nil
SourcesManager.search_by_name('BananaLib', true)
end
it 'updates the search index before performing a search if it exits' do
File.open(SourcesManager.search_index_path, 'w') { |file| file.write("---\nBananaLib:\n version: 0.0.1") }
SourcesManager.stubs(:all).returns([@test_source])
Source::Aggregate.any_instance.expects(:generate_search_index).never
Source::Aggregate.any_instance.expects(:update_search_index).returns('BananaLib' => {})
Source::Aggregate.any_instance.expects(:generate_search_index_for_source).with(@test_source).returns('BananaLib' => ['BananaLib'])
SourcesManager.updated_search_index = nil
SourcesManager.search_by_name('BananaLib', true)
end
......@@ -105,7 +95,7 @@ module Pod
SourcesManager.unstub(:search_index_path)
config.cache_root = Config::DEFAULTS[:cache_root]
path = SourcesManager.search_index_path.to_s
path.should.match %r{Library/Caches/CocoaPods/search_index.yaml}
path.should.match %r{Library/Caches/CocoaPods/search_index.json}
end
describe 'managing sources by URL' do
......@@ -241,12 +231,6 @@ module Pod
UI.output.should.match /is up to date/
end
it 'uses the only fast forward git option' do
set_up_test_repo_for_update
SourcesManager.expects(:git!).with { |options| options.should.include? '--ff-only' }
SourcesManager.update(test_repo_path.basename.to_s, true)
end
it 'prints a warning if the update failed' do
UI.warnings = ''
set_up_test_repo_for_update
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment