搜索相关

elasticsearch,基于luence,用于存储索引搜索,支持两种方式,java或者http方式。要求java8.

logstash,数据处理,input和output,中间支持filter

kibana,展示

启动elasticsearch

elasticsearch安装目录运行 bin/elasticsearch

访问 http://localhost:9200/可以看到一些信息

通过url进行搜索
//travelid_db下的users表,id为1的数据
http://localhost:9200/travelid_db/users/1

//搜索金刚,木有指定column
http://localhost:9200/travelid_db/users/_search?q=%E9%87%91%E5%88%9A

//搜索金刚指定column
http://localhost:9200/travelid_db/users/_search?q=nickname:%E9%87%91%E5%88%9A

//DSL查询,可以构建复杂查询
使用php搜索
public function searchHotel(Request $request){
    $q = $request->input("q");
    $city = $request->input("city");
    $client = new Client();
    $request_para = [
        "query" => [
            "bool" =>[
                "must" => [
                    [
                        "match" => [ "name_cn"=>$q ]
                    ],
                    [
                        "match" => [ "city_name_cn"=>$city ]
                    ]
                ]
            ]
        ],
        "highlight"=> [
//                "pre_tags"=> ["<b>"],
//                "post_tags"=> ["</b>"],
            "fields" => ["name_cn" => ["type"=> "plain"]]
        ]
    ];
    $response = $client->request("GET","http://localhost:9200/hq_hotel/hotel/_search",
        [
            "json" => $request_para
        ]);
    if($response->getStatusCode() == 200){
        $json_string =  $response->getBody()->getContents();


        return response()->json(
            [
                'ok' => 0,
                'msg' => 'ok',
                'obj' => \GuzzleHttp\json_decode($json_string)
            ]);
    }
}
使用python导入数据
# coding=utf-8

from bs4 import BeautifulSoup
import json
import datetime
import os.path

from elasticsearch import Elasticsearch
import elasticsearch.helpers

def es_hotels():
    print "start hotels"
    es = Elasticsearch()

    set_mapping(es)

    with open(city_json) as f:
        city_arr = json.load(f)
        f.close()
        count = 0
        for city in city_arr:
            hotel_json = hotelJsonFolder + "hotellist_cityId%s.json" % city["id"]
            if not os.path.isfile(hotel_json):
                continue

            # insert one json file

            count += 1

            with open(hotel_json) as hotel_file:
                hotel_arr = json.load(hotel_file)
                hotel_file.close()

                actions = [
                    {
                        "_op_type": "index",
                        "_index": "hq_hotel",
                        "_type": "hotel",
                        "_source": d
                    }
                    for d in hotel_arr
                ]
                elasticsearch.helpers.bulk(es, actions)

                #如果使用并行方法,记住parallel_bulk返回的是generator,所以要调用下
                #from collections import deque
                #deque(elasticsearch.helpers.parallel_bulk(es, actions), maxlen=0)
                #

            if count % 100 == 0:
                print count
                now()

    print "end"
    now()


# 设置索引规则,bulk之前调用一下 
def set_mapping(es, index_name="hq_hotel", doc_type_name="hotel"):
    my_mapping = '''
    {
      "mappings":{
        "hotel":{
          "properties":{
            "city_name_cn":{
                "type": "string",
                "index": "not_analyzed"
            },
            "country_name_cn": {
                "type": "string",
                "index": "not_analyzed"
            },
            "name_cn":{
                "type": "string",
                "index": "analyzed"
            },
            "name_en":{
                "type": "string",
                "index": "analyzed"
            }
          }
        }
      }
    }'''
    if es.indices.exists(index_name):
        es.indices.delete(index=index_name)
        print "delete exits index " + index_name

    print "create index"
    es.indices.create(index=index_name, body=my_mapping)
ubuntu上部署

安装java8或者open-jdk8

设置源,参见https://www.elastic.co/guide/en/elasticsearch/reference/1.7/setup-repositories.html

设置配置文件,把network.host: 0.0.0.0 内外网均可以可以访问

注意,服务重启是需要时间,所以立刻访问会出现 connection refused

直接下载部署时,要更改文件属性 sudo chmod -R 777 es_folder

否则报奇怪的权限错误

自启动的尴尬
1. 在/etc/rc.local 加脚本
2. 注意elsearch不能以root身份运行,运行切换到用户态,su ubuntu

#!/bin/sh

su ubuntu /data/es/elasticsearch-5.5.1/bin/elasticsearch &

/data/es/kibana-5.5.1-linux-x86_64/bin/kibana &

kibana

1. 安装按照官方的文档
2. index pattern : 加载动态index,主要为这类index,比如log,index名字是log-2015.07.08,日志按天划分
其实应该是在同一index下的不同type的。但是kibana还是支持的。
通常我们直接使用index就好,数据过滤都是通过fiter的_type来指定不同doc type
3. 可认为是查询的可视化,还可以保存一点中间结果的
使用visualize
1. filter 用来过滤数据,可使用查询语句来搞
2. buckets 类似sql的group by
3. metrics 是数据源的要显示的统计量

logstash

要导入mysql,需要插件logstash-input-jdbc,进入到logstash/bin下,运行

./logstash-plugin install logstash-input-jdbc命令

下载mysql的jdbc驱动,放到lib下

配置文件jdbc_test.conf

input{
  jdbc{
  jdbc_driver_library => "~/Develop/logstash-5.5.1/lib/third-lib/mysql-connector-java-5.1.43-bin.jar"  #上文下载的mysql的jdbc驱动文件绝对路径
                jdbc_driver_class => "com.mysql.jdbc.Driver" #mysql数据库
                jdbc_connection_string => "jdbc:mysql://localhost:3306/travelid_db"   #格式jdbc:mysql://IP:PORT/数据库名
                jdbc_user => "root"  #数据库用户名
                jdbc_password => "5040309511" #数据库密码
                statement => "SELECT * from masters" #查询语句
  }
}

filter {
        mutate {
                add_field => {
                        "type" => "bdoor-order"  #添加一个type字段,值为bdoor-order
                }
        }
}

output { 
       #stdout { codec => rubydebug } #测试用,将结果打印在终端
       elasticsearch {
        action => create
        index => "travelid_db"
        document_type => "masters"
        document_id => "%{id}"
        hosts => ["http://127.0.0.1:9200"]
        }
}

运行

bin/logstash -f config/jdbc_test.conf
#http pie 查询
http GET ":9200/travelid_db/masters/_search?q=顾问"

#curl
curl -XGET 'localhost:9200/travelid_db/masters/_search?pretty&q=顾问'

results matching ""

    No results matching ""