2018年12月

首页2018年12月
29
Dec
0

impala安装使用

pip install six
pip install bit_array
pip install thriftpy
pip install sasl-0.2.1-cp36-cp36m-win32.whl
pip install thrift_sasl
pip install bitarray-0.8.3-cp36-cp36m-win32.whl
pip install impyla
pip install pandas

测试代码
from impala.dbapi import connect
from impala.util import as_pandas
conn = connect(host='10.86.45.40', port=21050)
cur = conn.cursor()

cur.execute('SHOW TABLES')

data = as_pandas(cur)

print(data)

cur.execute('SELECT * FROM geelylogdatawarehouse limit 10')

cur.execute('SELECT row_number() over (order by appid) as rnum, * FROM geelylogdatawarehouse where ymd = 20171230 order by appid limit 10 offset 1000000')

cur.execute('SELECT count(*) FROM geelylogdatawarehouse where ymd = 20171230')

data = cur.fetchall()`

print(data)

description = cur.description

print(description)

for row in cur:

print(row[4])

data = as_pandas(cur)
print(data)

报错:
thriftpy.parser.exc.ThriftParserError: ThriftPy does not support generating module with path in protocol 'c'
解决:
根据提示找到parser.py文件 488行 if url_scheme == '': 修改为 if len(url_scheme) <= 1:

    1. whl文件需要到https://www.lfd.uci.edu/~gohlke/pythonlibs/下载
      或者使用附件里的版本
    2. centos下:sudo yum install gcc-c++ python-devel.x86_64 cyrus-sasl-devel.x86_64 然后pip install sasl
    3. 如果直接安装impyla则会报Microsoft Visual C++ 14.0 is required. Get it with "Microsoft Visual
      C++ Build Tools": http://landinghub.visualstudio.com/visual-cpp-build-tools

whl.rar

11
Dec
0

kafka测试单元

package TestKafka;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.TopicPartition;

import java.util.Arrays;
import java.util.List;
import java.util.Properties;

public class TestProducer {

public static void runProducer(int i) {
    Properties props = new Properties();
    props.put("bootstrap.servers", "10.190.35.131:9092");
    props.put("acks", "all");
    props.put("retries ", 1);
    props.put("buffer.memory", 33554432);
    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");

    Producer<String, String> producer = new KafkaProducer<String, String>(props);
    producer.send(new ProducerRecord<String, String>("test", Integer.toString(i), Integer.toString(i) + "!!"));
    producer.close();
}

public static void runConsumer() {
    KafkaConsumer<String, String> consumer;

    Properties props = new Properties();
    props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "10.190.35.131:9092");  //10.190.35.131:9092   10.86.45.40:9092
    props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest"); //"earliest" else "latest"
    props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
    props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
    props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
    props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
    props.put(ConsumerConfig.GROUP_ID_CONFIG, "consumer_group_test");

    consumer = new KafkaConsumer(props);

    consumer.subscribe(Arrays.asList("test"));  // log.sweet.request.trace.production
    try {
        while (true) {
            ConsumerRecords<String, String> records = consumer.poll(100); //Long.MAX_VALUE
            for (TopicPartition partition : records.partitions()) {
                List<ConsumerRecord<String, String>> partitionRecords = records.records(partition);
                for (ConsumerRecord<String, String> record : partitionRecords) {
                    System.out.printf("offset = %d, key = %s, value = %s", record.offset(), record.key(), record.value());
                    System.out.println("");
                }
            }
            consumer.commitSync();
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        consumer.close();
    }
}

public static void main(String[] args) throws Exception {
    Thread thread = new Thread() {
        public void run() {
            int i = 0;
            while (true) {
                i++;
                runProducer(i);
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        }
    };
    thread.start();

    runConsumer();

// String s = String.format("this is number : %10.2f", 50.1);
// System.out.println(s);

}

}

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"

     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.geely.storm.test.producer</groupId>
<artifactId>TestProducer</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
    <kafka.client.version>0.10.0.0</kafka.client.version>
</properties>
<dependencies>
    <dependency>
        <groupId>org.apache.kafka</groupId>
        <artifactId>kafka-clients</artifactId>
        <version>${kafka.client.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.kafka</groupId>
        <artifactId>kafka-streams</artifactId>
        <version>${kafka.client.version}</version>
    </dependency>
</dependencies>

</project>

08
Dec
0

kafka安装 直接复制官网

原始地址:http://kafka.apache.org/quickstart

tar -xzf kafka_2.11-2.1.0.tgz

cd kafka_2.11-2.1.0

bin/zookeeper-server-start.sh config/zookeeper.properties

bin/kafka-server-start.sh config/server.properties

创建主题
bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test

查看主题
bin/kafka-topics.sh --list --zookeeper localhost:2181

创建一个producter
bin/kafka-console-producer.sh --broker-list localhost:9092 --topic test

启动一个consumer
bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic test –from-beginning

设置集群
cp config/server.properties config/server-1.properties
    broker.id=1
    listeners=PLAINTEXT://:9093
    log.dirs=/tmp/kafka-logs-1

cp config/server.properties config/server-2.properties

broker.id=2

    listeners=PLAINTEXT://:9094
    log.dirs=/tmp/kafka-logs-2

启动两个新节点
bin/kafka-server-start.sh config/server-1.properties &
bin/kafka-server-start.sh config/server-2.properties &

创建一个分区为3的TOPIC
bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 3 --partitions 1 --topic my-replicated-topic

查看分区状态
bin/kafka-topics.sh --describe --zookeeper localhost:2181 --topic my-replicated-topic
bin/kafka-topics.sh --describe --zookeeper localhost:2181 --topic test