Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
C
crawler
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
backend
crawler
Commits
69628563
Commit
69628563
authored
4 years ago
by
litaolemo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
c1b02661
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
50 additions
and
46 deletions
+50
-46
revise_data_from_es.py
write_data_into_es/revise_data_from_es.py
+50
-46
No files found.
write_data_into_es/revise_data_from_es.py
View file @
69628563
...
...
@@ -41,50 +41,54 @@ search_body = {
}
es2_re
=
es2
.
search
(
index
=
target_index
,
doc_type
=
target_type
,
body
=
search_body
)
q3_total
=
es2_re
[
'hits'
][
'total'
]
write_into_scan
=
scan
(
client
=
es2
,
query
=
search_body
,
index
=
target_index
,
doc_type
=
target_type
,
scroll
=
'5m'
,
request_timeout
=
100
)
count
=
0
for
one_scan
in
write_into_scan
:
count
=
count
+
1
line
=
one_scan
[
'_source'
]
doc_id
=
one_scan
[
'_id'
]
try
:
if
line
[
'facevideo_is_assistant'
]
==
1
:
line
[
'facevideo_is_assistant'
]
=
True
else
:
line
[
'facevideo_is_assistant'
]
=
False
except
:
pass
bulk_head
=
'{"index": {"_id":"
%
s"}}'
%
doc_id
data_str
=
json
.
dumps
(
line
,
ensure_ascii
=
False
)
bulk_one_body
=
bulk_head
+
'
\n
'
+
data_str
+
'
\n
'
# print(bulk_one_body)
bulk_all_body
+=
bulk_one_body
# break
if
count
%
500
==
0
:
eror_dic
=
es7
.
bulk
(
index
=
target_index
,
body
=
bulk_all_body
,
request_timeout
=
200
)
bulk_all_body
=
''
if
eror_dic
[
'errors'
]
is
True
:
print
(
eror_dic
[
'items'
])
print
(
bulk_all_body
)
print
(
count
)
if
bulk_all_body
!=
''
:
eror_dic
=
es7
.
bulk
(
body
=
bulk_all_body
,
index
=
target_index
,
request_timeout
=
200
)
if
eror_dic
[
'errors'
]
is
True
:
print
(
eror_dic
)
es7_re
=
es7
.
search
(
index
=
target_index
,
doc_type
=
target_type
,
body
=
search_body
)
bulk_all_body
=
''
#
# q3_total = es2_re['hits']['total']
# write_into_scan = scan(client=es2,
# query=search_body,
# index=target_index,
# doc_type=target_type,
# scroll='5m',
# request_timeout=100
# )
#
# count = 0
# for one_scan in write_into_scan:
# count = count + 1
# line = one_scan['_source']
# doc_id = one_scan['_id']
# try:
# if line['facevideo_is_assistant'] == 1:
# line['facevideo_is_assistant'] = True
# else:
# line['facevideo_is_assistant'] = False
# except:
# pass
#
# bulk_head = '{"index": {"_id":"%s"}}' % doc_id
# data_str = json.dumps(line, ensure_ascii=False)
#
# bulk_one_body = bulk_head + '\n' + data_str + '\n'
# # print(bulk_one_body)
# bulk_all_body += bulk_one_body
# # break
# if count%500 == 0:
#
# eror_dic=es7.bulk(index=target_index,
# body=bulk_all_body, request_timeout=200)
# bulk_all_body=''
# if eror_dic['errors'] is True:
# print(eror_dic['items'])
# print(bulk_all_body)
# print(count)
#
# if bulk_all_body != '':
# eror_dic = es7.bulk(body=bulk_all_body,
# index=target_index,
# request_timeout=200)
# if eror_dic['errors'] is True:
# print(eror_dic)
#
# bulk_all_body = ''
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment