543 lines
16 KiB
Python
Raw Normal View History

2024-09-14 19:18:59 +08:00
import json, base64, re, string, random, threading, codecs
2024-04-02 15:36:52 +08:00
from collections.abc import Mapping, MutableMapping
2024-01-08 23:37:00 +08:00
from collections import OrderedDict
2024-04-02 15:36:52 +08:00
from urllib.parse import urlencode, urlsplit
from functools import partial
2024-07-08 22:25:59 +08:00
from myutils.config import globalconfig
2024-04-02 15:36:52 +08:00
2024-07-10 12:52:06 +08:00
default_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
2024-10-26 13:33:02 +08:00
default_timeout = 10
2024-06-27 19:59:38 +08:00
2024-07-09 15:45:34 +08:00
class RequestException(Exception):
2024-05-15 01:51:28 +08:00
pass
2024-04-02 15:36:52 +08:00
2024-06-27 19:59:38 +08:00
2024-07-09 15:45:34 +08:00
class Timeout(RequestException):
2024-01-26 22:00:48 +08:00
pass
2024-04-02 15:36:52 +08:00
2024-09-04 19:30:06 +08:00
class HTTPError(RequestException):
pass
2024-04-02 15:36:52 +08:00
class CaseInsensitiveDict(MutableMapping):
2024-01-08 23:37:00 +08:00
def __init__(self, data=None, **kwargs):
self._store = OrderedDict()
if data is None:
data = {}
self.update(data, **kwargs)
def __setitem__(self, key, value):
# Use the lowercased key for lookups, but store the actual
# key alongside the value.
self._store[key.lower()] = (key, value)
def __getitem__(self, key):
return self._store[key.lower()][1]
def __delitem__(self, key):
del self._store[key.lower()]
def __iter__(self):
return (casedkey for casedkey, mappedvalue in self._store.values())
def __len__(self):
return len(self._store)
def lower_items(self):
"""Like iteritems(), but with all lowercase keys."""
return ((lowerkey, keyval[1]) for (lowerkey, keyval) in self._store.items())
def __eq__(self, other):
if isinstance(other, Mapping):
other = CaseInsensitiveDict(other)
else:
return NotImplemented
# Compare insensitively
return dict(self.lower_items()) == dict(other.lower_items())
# Copy is required
def copy(self):
return CaseInsensitiveDict(self._store.values())
def __repr__(self):
return str(dict(self.items()))
2024-04-02 15:36:52 +08:00
2024-01-11 00:45:06 +08:00
class ResponseBase:
2024-08-29 21:01:21 +08:00
def __init__(self, stream):
2024-07-17 23:00:04 +08:00
self.headers = CaseInsensitiveDict()
2024-08-29 21:01:21 +08:00
self.stream = stream
2024-09-04 19:30:06 +08:00
self.url = ""
2024-07-17 23:00:04 +08:00
self.cookies = {}
self.status_code = 0
2024-11-26 18:45:49 +08:00
self.reason = ""
2024-08-29 21:01:21 +08:00
self.__content = b""
2024-08-29 22:34:44 +08:00
self.__content_s = []
2024-08-29 21:01:21 +08:00
self.content_prepared = threading.Event()
2024-08-29 22:42:53 +08:00
self.iter_once = True
2024-08-29 21:01:21 +08:00
@property
def content(self):
2024-08-29 22:34:44 +08:00
if self.stream:
2024-08-29 22:42:53 +08:00
if self.iter_once:
for _ in self.iter_content():
pass
self.content_prepared.wait()
2024-08-29 22:34:44 +08:00
return b"".join(self.__content_s)
else:
return self.__content
2024-08-29 21:01:21 +08:00
@content.setter
def content(self, c):
2024-08-29 22:34:44 +08:00
if self.stream:
raise RequestException()
2024-08-29 21:01:21 +08:00
self.__content = c
2024-04-02 15:36:52 +08:00
2024-01-11 00:21:58 +08:00
@property
2024-04-02 15:36:52 +08:00
def text(self):
try:
2024-01-11 00:21:58 +08:00
return self.content.decode(self.charset)
except:
2024-04-02 15:36:52 +08:00
raise Exception("unenable to decode with {}".format(self.charset))
2024-01-11 00:21:58 +08:00
@property
def charset(self):
2024-04-02 15:36:52 +08:00
content_type = self.headers.get("Content-Type", "")
2024-01-11 00:21:58 +08:00
m = re.search(r"charset=([\w-]+)", content_type)
charset = m.group(1) if m else "utf-8"
return charset
2024-10-25 16:56:29 +08:00
2024-01-11 00:21:58 +08:00
def json(self):
return json.loads(self.text)
2024-04-02 15:36:52 +08:00
2024-09-14 19:18:59 +08:00
def stream_decode_response_unicode(self, iterator):
decoder = codecs.getincrementaldecoder(self.charset)(errors="replace")
for chunk in iterator:
rv = decoder.decode(chunk)
if rv:
yield rv
rv = decoder.decode(b"", final=True)
if rv:
yield rv
2024-04-02 15:36:52 +08:00
def iter_content(self, chunk_size=1, decode_unicode=False):
2024-08-29 21:01:21 +08:00
if not self.stream:
raise RequestException()
2024-08-29 22:42:53 +08:00
if not self.iter_once:
2024-08-29 21:01:21 +08:00
raise RequestException()
2024-08-29 22:42:53 +08:00
self.iter_once = False
2024-08-29 21:01:21 +08:00
2024-09-14 19:18:59 +08:00
def __generate():
for chunk in self.iter_content_impl(chunk_size):
self.__content_s.append(chunk)
2024-03-05 04:15:50 +08:00
yield chunk
2024-09-14 19:18:59 +08:00
self.content_prepared.set()
stream_chunks = __generate()
chunks = stream_chunks
if decode_unicode:
chunks = self.stream_decode_response_unicode(chunks)
return chunks
2024-04-02 15:36:52 +08:00
def iter_content_impl(self, chunk_size=1):
2024-03-05 04:15:50 +08:00
pass
2024-04-02 15:36:52 +08:00
def iter_lines(self, chunk_size=512, decode_unicode=False, delimiter=None):
2024-03-05 04:15:50 +08:00
pending = None
2024-04-02 15:36:52 +08:00
size = 0
2024-03-05 04:15:50 +08:00
for chunk in self.iter_content(
chunk_size=chunk_size, decode_unicode=decode_unicode
):
2024-04-02 15:36:52 +08:00
size += len(chunk)
2024-03-05 04:15:50 +08:00
if pending is not None:
chunk = pending + chunk
if delimiter:
lines = chunk.split(delimiter)
else:
lines = chunk.splitlines()
if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]:
pending = lines.pop()
else:
pending = None
yield from lines
if pending is not None:
yield pending
2024-04-02 15:36:52 +08:00
2024-09-04 19:30:06 +08:00
def raise_for_status(self):
2024-11-14 22:50:54 +08:00
which = None
2024-09-04 19:30:06 +08:00
if 400 <= self.status_code < 500:
2024-11-14 22:50:54 +08:00
which = "Client"
2024-09-04 19:30:06 +08:00
elif 500 <= self.status_code < 600:
2024-11-14 22:50:54 +08:00
which = "Server"
if which:
http_error_msg = "{code} {which} Error: {text} for url: {url}".format(
2024-11-26 18:45:49 +08:00
code=self.status_code, which=which, text=self.reason, url=self.url
2024-11-14 22:50:54 +08:00
)
2024-09-04 19:30:06 +08:00
raise HTTPError(http_error_msg)
2024-04-02 15:36:52 +08:00
2024-07-08 22:25:59 +08:00
class Requester_common:
2024-07-09 01:05:57 +08:00
Accept_Encoding = "gzip, deflate, br"
2024-07-10 12:52:06 +08:00
default_UA = default_UA
2024-07-08 22:25:59 +08:00
2024-01-08 23:37:00 +08:00
@staticmethod
2024-04-02 15:36:52 +08:00
def _encode_params(data):
2024-01-08 23:37:00 +08:00
if isinstance(data, (str, bytes)):
return data
elif hasattr(data, "read"):
return data
elif hasattr(data, "__iter__"):
result = []
for k, vs in list(data.items()):
if isinstance(vs, (str, bytes)) or not hasattr(vs, "__iter__"):
vs = [vs]
for v in vs:
if v is not None:
result.append(
(
k.encode("utf-8") if isinstance(k, str) else k,
v.encode("utf-8") if isinstance(v, str) else v,
)
)
return urlencode(result, doseq=True)
else:
return data
2024-04-02 15:36:52 +08:00
2024-07-11 01:12:48 +08:00
def _parseurl(self, url: str, param):
2024-04-02 15:36:52 +08:00
url = url.strip()
scheme, server, path, query, _ = urlsplit(url)
if scheme not in ["https", "http"]:
raise Exception("unknown scheme " + scheme)
spl = server.split(":")
if len(spl) == 2:
server = spl[0]
port = int(spl[1])
elif len(spl) == 1:
2024-01-08 23:37:00 +08:00
spl[0]
2024-04-02 15:36:52 +08:00
if scheme == "https":
port = 443
2024-01-08 23:37:00 +08:00
else:
2024-04-02 15:36:52 +08:00
port = 80
2024-01-08 23:37:00 +08:00
else:
2024-04-02 15:36:52 +08:00
raise Exception("invalid url")
2024-01-08 23:37:00 +08:00
if param:
2024-04-02 15:36:52 +08:00
param = self._encode_params(param)
query += ("&" if len(query) else "") + param
2024-01-08 23:37:00 +08:00
if len(query):
2024-04-02 15:36:52 +08:00
path += "?" + query
url = scheme + "://" + server + path
return scheme, server, port, path, url
def request(
self,
method,
url,
params=None,
data=None,
headers=None,
proxies=None,
json=None,
cookies=None,
files=None,
auth=None,
2024-10-26 13:33:02 +08:00
timeout=default_timeout,
2024-04-02 15:36:52 +08:00
allow_redirects=True,
hooks=None,
stream=None,
verify=False,
cert=None,
2024-07-11 01:12:48 +08:00
) -> ResponseBase:
2024-04-02 15:36:52 +08:00
if auth and isinstance(auth, tuple) and len(auth) == 2:
headers["Authorization"] = (
"Basic "
+ (
base64.b64encode(
b":".join((auth[0].encode("latin1"), auth[1].encode("latin1")))
).strip()
).decode()
)
scheme, server, port, param, url = self._parseurl(url, params)
2024-07-17 03:00:09 +08:00
databytes = b""
contenttype = None
if files:
contenttype, databytes = self._parsefilesasmultipart(files, headers)
elif data:
contenttype, databytes = self._parsedata(data)
elif json:
contenttype, databytes = self._parsejson(json)
if len(databytes):
headers["Content-Length"] = str(len(databytes))
if contenttype and ("Content-Type" not in headers):
headers["Content-Type"] = contenttype
2024-04-02 15:36:52 +08:00
proxy = proxies.get(scheme, None) if proxies else None
proxy = None if proxy == "" else proxy
2024-01-26 22:00:48 +08:00
if timeout:
2024-04-02 15:36:52 +08:00
if isinstance(timeout, (float, int)):
2024-10-28 10:12:40 +08:00
timeout = (int(timeout * 1000), 0) # convert to milliseconds
2024-01-30 15:14:57 +08:00
else:
try:
2024-10-28 10:12:40 +08:00
timeout = [int(_ * 1000) for _ in timeout[:2]]
2024-01-30 15:14:57 +08:00
except:
2024-04-02 15:36:52 +08:00
print("Error invalid timeout", timeout)
2024-10-28 10:12:40 +08:00
timeout = [0, 0]
timeout.append(0)
timeout = timeout[:2]
else:
timeout = (0, 0)
2024-07-11 01:12:48 +08:00
return self.request_impl(
2024-04-02 15:36:52 +08:00
method,
scheme,
server,
port,
param,
url,
headers,
2024-07-11 01:12:48 +08:00
cookies,
2024-07-17 03:00:09 +08:00
databytes,
2024-04-02 15:36:52 +08:00
proxy,
stream,
verify,
timeout,
2024-07-08 20:30:14 +08:00
allow_redirects,
2024-04-02 15:36:52 +08:00
)
2024-07-11 01:12:48 +08:00
def request_impl(self, *argc) -> ResponseBase: ...
def _parseheader(self, headers: CaseInsensitiveDict, cookies: dict):
_x = []
if cookies:
cookie = self._parsecookie(cookies)
headers.update({"Cookie": cookie})
for k in sorted(headers.keys()):
_x.append("{}: {}".format(k, headers[k]))
return _x
def _parsecookie(self, cookie: dict):
_c = []
for k, v in cookie.items():
_c.append("{}={}".format(k, v))
return "; ".join(_c)
2024-07-17 00:38:32 +08:00
def _parsecookiestring(self, cookiestr: str):
if not cookiestr:
return {}
cookies = cookiestr.split("; ")
cookie = {}
for _c in cookies:
_idx = _c.find("=")
cookie[_c[:_idx]] = _c[_idx + 1 :]
return cookie
2024-07-11 01:12:48 +08:00
def _parseheader2dict(self, headerstr: str):
header = CaseInsensitiveDict()
cookie = {}
2024-09-04 22:33:36 +08:00
lines = headerstr.split("\r\n")
2024-11-26 18:45:49 +08:00
reason = " ".join(lines[0].split(" ")[2:])
2024-09-04 22:33:36 +08:00
for line in lines[1:]:
2024-07-11 01:12:48 +08:00
idx = line.find(": ")
if idx == -1:
continue
if line[:idx].lower() == "set-cookie":
2024-07-17 00:38:32 +08:00
cookie.update(self._parsecookiestring(line[idx + 2 :]))
2024-07-11 01:12:48 +08:00
else:
header[line[:idx]] = line[idx + 2 :]
2024-11-26 18:45:49 +08:00
return CaseInsensitiveDict(header), cookie, reason
2024-07-11 01:12:48 +08:00
2024-07-17 03:00:09 +08:00
def _parsejson(self, _json):
databytes = json.dumps(_json).encode("utf8")
contenttype = "application/json"
return contenttype, databytes
def _parsedata(self, data):
contenttype = None
databytes = self._encode_params(data)
if isinstance(databytes, str):
databytes = (databytes).encode("utf8")
if isinstance(data, (str, bytes)):
pass
else:
contenttype = "application/x-www-form-urlencoded"
return contenttype, databytes
def _parsefilesasmultipart(self, files: dict, header: dict):
def generate_random_string(length=16):
characters = string.ascii_letters + string.digits
return "".join(random.choices(characters, k=length))
_ct = header.get("Content-Type", None)
_ct_start = "multipart/form-data; boundary="
if _ct and _ct.lower().startswith(_ct_start):
boundary = _ct[len(_ct_start) :]
else:
boundary = "----WebKitFormBoundary" + generate_random_string()
_ct = _ct_start + boundary
boundary = boundary.encode()
items = []
for name, data in files.items():
items.append(b"--" + boundary)
disposition = b'Content-Disposition: form-data; name="'
disposition += name.encode("utf8")
disposition += b'"'
2024-07-28 23:29:11 +08:00
if isinstance(data, (tuple, list)):
2024-07-17 03:00:09 +08:00
if len(data) == 3:
filename, data, type_ = data
elif len(data) == 2:
filename, data = data
type_ = None
else:
filename = None
type_ = None
if filename:
disposition += b'; filename="'
disposition += filename.encode("utf8")
disposition += b'"'
items.append(disposition)
if type_:
Type = b"Content-Type: "
Type += type_.encode("utf8")
items.append(Type)
items.append(b"")
if isinstance(data, str):
data = data.encode("utf8")
items.append(data)
items.append(b"--" + boundary + b"--")
return _ct, b"".join(_ + b"\r\n" for _ in items)
2024-07-11 01:12:48 +08:00
class Session:
2024-07-17 23:00:04 +08:00
def __init__(self):
self.cookies = {}
self._requester = None
self._libidx = -1
self.headers = CaseInsensitiveDict(
{
# "Accept-Encoding": "gzip, deflate, br",
"Accept": "*/*",
"Connection": "keep-alive",
}
)
2024-07-13 17:18:00 +08:00
2024-07-11 01:12:48 +08:00
def __enter__(self):
return self
def __exit__(self, *args):
pass
@property
def requester(self) -> Requester_common:
if self._libidx == globalconfig["network"]:
return self._requester
if globalconfig["network"] == 1:
from network.libcurl.requester import Requester
elif globalconfig["network"] == 0:
from network.winhttp.requester import Requester
self._requester = Requester()
self._libidx = globalconfig["network"]
2024-07-13 17:25:32 +08:00
2024-07-13 17:19:29 +08:00
self.headers.update({"Accept-Encoding": self.requester.Accept_Encoding})
self.headers.update({"User-Agent": self.requester.default_UA})
2024-07-11 01:12:48 +08:00
return self._requester
def request(
self,
method: str,
url: str,
params=None,
data=None,
headers=None,
proxies=None,
json=None,
cookies=None,
files=None,
auth=None,
2024-10-26 13:33:02 +08:00
timeout=default_timeout,
2024-07-11 01:12:48 +08:00
allow_redirects=True,
hooks=None,
stream=None,
verify=False,
cert=None,
):
2024-07-13 17:25:32 +08:00
requester = self.requester
2024-07-17 00:38:32 +08:00
2024-07-13 17:18:00 +08:00
_h = self.headers.copy()
if headers:
_h.update(headers)
2024-07-17 00:38:32 +08:00
self.cookies.update(requester._parsecookiestring(_h.get("cookie", "")))
if cookies:
self.cookies.update(cookies)
2024-07-13 17:25:32 +08:00
response = requester.request(
2024-07-11 01:12:48 +08:00
method.upper(),
url,
params=params,
data=data,
headers=_h,
proxies=proxies,
json=json,
cookies=self.cookies,
files=files,
auth=auth,
timeout=timeout,
allow_redirects=allow_redirects,
hooks=hooks,
stream=stream,
verify=verify,
cert=cert,
2024-07-11 01:12:48 +08:00
)
2024-07-08 22:25:59 +08:00
self.cookies.update(response.cookies)
response.cookies.update(self.cookies)
return response
2024-04-02 15:36:52 +08:00
def get(self, url, **kwargs):
2024-01-11 11:45:13 +08:00
return self.request("GET", url, **kwargs)
2024-04-02 15:36:52 +08:00
def post(self, url, **kwargs):
2024-01-11 11:45:13 +08:00
return self.request("POST", url, **kwargs)
2024-04-02 15:36:52 +08:00
def options(self, url, **kwargs):
2024-01-11 11:45:13 +08:00
return self.request("OPTIONS", url, **kwargs)
2024-04-02 15:36:52 +08:00
2024-06-27 19:59:38 +08:00
def patch(self, url, **kwargs):
return self.request("PATCH", url, **kwargs)
def delete(self, url, **kwargs):
return self.request("DELETE", url, **kwargs)
2024-07-11 01:12:48 +08:00
def head(self, url, **kwargs):
return self.request("HEAD", url, **kwargs)
2024-04-02 15:36:52 +08:00
def request(method, url, **kwargs):
2024-07-08 22:25:59 +08:00
with Session() as session:
2024-01-08 23:37:00 +08:00
return session.request(method=method, url=url, **kwargs)
2024-04-02 15:36:52 +08:00
2024-01-08 23:37:00 +08:00
def session():
2024-07-08 22:25:59 +08:00
with Session() as session:
2024-01-10 20:32:14 +08:00
return session
2024-04-02 15:36:52 +08:00
get = partial(request, "GET")
post = partial(request, "POST")
options = partial(request, "OPTIONS")
2024-06-27 19:59:38 +08:00
patch = partial(request, "PATCH")
delete = partial(request, "DELETE")
2024-07-11 01:12:48 +08:00
head = partial(request, "HEAD")