import os import sys import json import errno import subprocess import threading import time import re from signal import SIGTERM, SIGKILL etcd_endpoint_ip = "18.0.17.5" ip_whitelist = ["18.0.18.88","18.0.18.58","18.0.18.56","18.0.18.54","18.0.18.46","18.0.18.93","18.0.18.92"] cidr_with_sharp = "18.0.18.0#24" def execute_cmd_raw(cmd_str, terminate_timeout=None, kill_timeout=None): '''Run cmd_str for terminate_timeout seconds before sending SIGTERM, and wait for kill_timeout seconds before sending SIGKILL.''' if terminate_timeout is None and kill_timeout is not None: raise ValueError('Setting terminate timeout is necessary' ' before setting kill timeout') p = subprocess.Popen(cmd_str, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True, preexec_fn=os.setpgrp) def poll(): if p.poll() is not None: return try: os.killpg(p.pid, SIGTERM) except OSError as e: if e.errno == errno.ESRCH: return raise if kill_timeout is None: return # t = monotonic_time() # while monotonic_time() - t < kill_timeout: # if p.poll() is not None: # return # time.sleep(1) try: os.killpg(p.pid, SIGKILL) except OSError as e: if e.errno == errno.ESRCH: return raise if terminate_timeout is not None: timer = threading.Timer(terminate_timeout, poll) timer.start() out, err = p.communicate() if terminate_timeout is not None: timer.cancel() timer.join() return dict(rc=p.returncode, stdout=out, stderr=err) def execute_cmd(cmd_str, terminate_timeout=None, kill_timeout=None): ret = execute_cmd_raw(cmd_str, terminate_timeout, kill_timeout) if ret['rc']: pass #raise CommandFailure(ret['stderr']) else: return ret def delete_pod(ns, pod): cmd_del_pod = "kubectl delete pod -n %s %s" %(ns, pod) print cmd_del_pod execute_cmd(cmd_del_pod) def check_ipam(): pods_dict = all_pods() #print pods_dict pods_name_fault = {} for ip, values in pods_dict.iteritems(): if not ip or ip == "None" or ip == "" or ip == "null": continue cmd_get_name_from_ipam = "ETCDCTL_API=3 etcdctl --endpoints=https://%s:2379 --cert /etc/kubernetes/pki/etcd/etcd-client.pem --cacert /etc/kubernetes/pki/etcd/ca.pem --key /etc/kubernetes/pki/etcd/etcd-client-key.pem get /ipam --prefix |grep -w %s"%(etcd_endpoint_ip, values['name']) res_get_name = execute_cmd(cmd_get_name_from_ipam) if res_get_name == None: print "pod %s not exists in etcd" % values['name'] continue pods_name_fault[values['name']] = json.loads(res_get_name["stdout"].split("\n")[0].strip()) pods_ip_not_in_ipam = {} for ip, values in pods_dict.iteritems(): if not ip or ip == "None" or ip == "" or ip == "null": continue cmd_get_ip_from_ipam = "ETCDCTL_API=3 etcdctl --endpoints=https://%s:2379 --cert /etc/kubernetes/pki/etcd/etcd-client.pem --cacert /etc/kubernetes/pki/etcd/ca.pem --key /etc/kubernetes/pki/etcd/etcd-client-key.pem get /ipam --prefix |grep -w %s"%(etcd_endpoint_ip, ip) res_get_ip = execute_cmd(cmd_get_ip_from_ipam) if res_get_ip == None: pods_ip_not_in_ipam[values['name']] = ip if not pods_ip_not_in_ipam: return for name in pods_ip_not_in_ipam.keys(): if name in pods_name_fault.keys(): json_obj = pods_name_fault[name] print "need fix ip: %s"%name fix_pod_record(pods_ip_not_in_ipam[name], json_obj) def fix_pod_record(ip, json_obj): if ip in ip_whitelist: print "ip in whitelist, skip to process" return old_etcd_k = "/ipam/%s/%s"%(cidr_with_sharp, json_obj["ip"]) new_obj = json_obj new_obj["ip"] = ip etcd_v = "'%s'"%json.dumps(new_obj) etcd_k = "/ipam/%s/%s"%(cidr_with_sharp, ip) cmd_add_key = "ETCDCTL_API=3 etcdctl --endpoints=https://%s:2379 --cert /etc/kubernetes/pki/etcd/etcd-client.pem --cacert /etc/kubernetes/pki/etcd/ca.pem --key /etc/kubernetes/pki/etcd/etcd-client-key.pem put %s %s" %(etcd_endpoint_ip, etcd_k, etcd_v) cmd_del_key = "ETCDCTL_API=3 etcdctl --endpoints=https://%s:2379 --cert /etc/kubernetes/pki/etcd/etcd-client.pem --cacert /etc/kubernetes/pki/etcd/ca.pem --key /etc/kubernetes/pki/etcd/etcd-client-key.pem del %s" %(etcd_endpoint_ip, old_etcd_k) print "cmd_add_key:",cmd_add_key execute_cmd(cmd_add_key) print "cmd_del_key:",cmd_del_key execute_cmd(cmd_del_key) print "=================================" cmd_etcd_get = "ETCDCTL_API=3 etcdctl --endpoints=https://%s:2379 --cert /etc/kubernetes/pki/etcd/etcd-client.pem --cacert /etc/kubernetes/pki/etcd/ca.pem --key /etc/kubernetes/pki/etcd/etcd-client-key.pem get %s" %(etcd_endpoint_ip, etcd_k) res = execute_cmd(cmd_etcd_get) print res["stdout"] def all_pods(): cmd_get_pods = "kubectl get po -A -o json" res_pods = json.loads(execute_cmd(cmd_get_pods)["stdout"]) pods_dict = {} for pod in res_pods["items"]: if pod["status"]["phase"] != "Running": continue if not pod["status"].has_key("podIP"): continue if pod["spec"].has_key("hostNetwork") and pod["spec"]["hostNetwork"] == True: continue pods_dict.setdefault(pod["status"]["podIP"], {"namespace": pod["metadata"]["namespace"], "name": pod["metadata"]["name"]}) return pods_dict while 1: print "start..process" check_ipam() print "end..process" time.sleep(5)