Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
M
meta_base_code
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
黎涛
meta_base_code
Commits
bdcf6b5a
Commit
bdcf6b5a
authored
Feb 25, 2021
by
litaolemo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
d311d1b6
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
95 additions
and
0 deletions
+95
-0
revise_data_from_es.py
utils/revise_data_from_es.py
+95
-0
No files found.
utils/revise_data_from_es.py
0 → 100644
View file @
bdcf6b5a
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 9 16:47:12 2018
@author: zhouyujiang
"""
import
elasticsearch
import
datetime
from
elasticsearch.helpers
import
scan
import
elasticsearch_7
as
e7
# import pandas as pd
import
json
# from func_cal_doc_id import cal_doc_id
# from urllib import parse
hosts
=
'172.16.52.49'
port
=
9200
es2
=
elasticsearch
.
Elasticsearch
(
hosts
=
hosts
,
port
=
port
)
es7
=
e7
.
Elasticsearch
(
hosts
=
"172.16.52.33"
,
port
=
9200
,
http_auth
=
(
"elastic"
,
"gengmei!@#"
))
# es7 = e7.Elasticsearch(hosts="172.16.31.17", port=9000)
target_index
=
'gm-dbmw-suggest-read'
target_type
=
'_doc'
from_index
=
'gm-dbmw-suggest-old'
from_type
=
'_doc'
bulk_all_body
=
''
search_body
=
{
"query"
:
{
"bool"
:
{
"filter"
:
[
]
}
}
}
es2_re
=
es2
.
search
(
index
=
target_index
,
doc_type
=
target_type
,
body
=
search_body
)
es7_re
=
es7
.
search
(
index
=
target_index
,
doc_type
=
target_type
,
body
=
search_body
)
print
(
es7_re
)
#
# q3_total = es2_re['hits']['total']
write_into_scan
=
scan
(
client
=
es2
,
query
=
search_body
,
index
=
target_index
,
doc_type
=
target_type
,
scroll
=
'5m'
,
request_timeout
=
100
)
count
=
0
for
one_scan
in
write_into_scan
:
count
=
count
+
1
line
=
one_scan
[
'_source'
]
doc_id
=
one_scan
[
'_id'
]
# try:
# if line['is_online'] == 1:
# line['is_online'] = True
# else:
# line['is_online'] = False
# except:
# pass
bulk_head
=
'{"index": {"_id":"
%
s"}}'
%
doc_id
data_str
=
json
.
dumps
(
line
,
ensure_ascii
=
False
)
bulk_one_body
=
bulk_head
+
'
\n
'
+
data_str
+
'
\n
'
# print(bulk_one_body)
bulk_all_body
+=
bulk_one_body
# break
if
count
%
500
==
0
:
eror_dic
=
es7
.
bulk
(
index
=
target_index
,
body
=
bulk_all_body
,
request_timeout
=
200
)
bulk_all_body
=
''
if
eror_dic
[
'errors'
]
is
True
:
print
(
eror_dic
[
'items'
])
print
(
bulk_all_body
)
print
(
count
)
if
bulk_all_body
!=
''
:
eror_dic
=
es7
.
bulk
(
body
=
bulk_all_body
,
index
=
target_index
,
request_timeout
=
200
)
if
eror_dic
[
'errors'
]
is
True
:
print
(
eror_dic
)
bulk_all_body
=
''
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment