Spaces:
Sleeping
Sleeping
:zap: [Enhance] FilepathConverter: New parent param when init
Browse files
networks/filepath_converter.py
CHANGED
@@ -34,8 +34,9 @@ WINDOWS_INVALID_FILE_PATH_NAMES = [
|
|
34 |
|
35 |
|
36 |
class FilepathConverter:
|
37 |
-
def __init__(self):
|
38 |
self.output_root = Path(__file__).parents[1] / "files"
|
|
|
39 |
|
40 |
def preprocess(self, input_string):
|
41 |
return input_string
|
@@ -63,6 +64,7 @@ class FilepathConverter:
|
|
63 |
filename = self.validate(filename)
|
64 |
filename = self.append_extension(filename)
|
65 |
|
|
|
66 |
if parent:
|
67 |
filepath = self.output_root / parent / filename
|
68 |
else:
|
@@ -75,8 +77,8 @@ class FilepathConverter:
|
|
75 |
|
76 |
|
77 |
class UrlToFilepathConverter(FilepathConverter):
|
78 |
-
def __init__(self):
|
79 |
-
super().__init__()
|
80 |
self.output_root = self.output_root / "urls"
|
81 |
|
82 |
def preprocess(self, url):
|
@@ -85,8 +87,8 @@ class UrlToFilepathConverter(FilepathConverter):
|
|
85 |
|
86 |
|
87 |
class QueryToFilepathConverter(FilepathConverter):
|
88 |
-
def __init__(self):
|
89 |
-
super().__init__()
|
90 |
self.output_root = self.output_root / "queries"
|
91 |
|
92 |
|
@@ -100,5 +102,5 @@ if __name__ == "__main__":
|
|
100 |
"https://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename"
|
101 |
)
|
102 |
|
103 |
-
url_converter = UrlToFilepathConverter()
|
104 |
-
print(url_converter.convert(url
|
|
|
34 |
|
35 |
|
36 |
class FilepathConverter:
|
37 |
+
def __init__(self, parent: str = None):
|
38 |
self.output_root = Path(__file__).parents[1] / "files"
|
39 |
+
self.parent = parent
|
40 |
|
41 |
def preprocess(self, input_string):
|
42 |
return input_string
|
|
|
64 |
filename = self.validate(filename)
|
65 |
filename = self.append_extension(filename)
|
66 |
|
67 |
+
parent = parent or self.parent
|
68 |
if parent:
|
69 |
filepath = self.output_root / parent / filename
|
70 |
else:
|
|
|
77 |
|
78 |
|
79 |
class UrlToFilepathConverter(FilepathConverter):
|
80 |
+
def __init__(self, parent: str = None):
|
81 |
+
super().__init__(parent)
|
82 |
self.output_root = self.output_root / "urls"
|
83 |
|
84 |
def preprocess(self, url):
|
|
|
87 |
|
88 |
|
89 |
class QueryToFilepathConverter(FilepathConverter):
|
90 |
+
def __init__(self, parent: str = None):
|
91 |
+
super().__init__(parent)
|
92 |
self.output_root = self.output_root / "queries"
|
93 |
|
94 |
|
|
|
102 |
"https://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename"
|
103 |
)
|
104 |
|
105 |
+
url_converter = UrlToFilepathConverter(parent=query)
|
106 |
+
print(url_converter.convert(url))
|
networks/google_searcher.py
CHANGED
@@ -2,6 +2,7 @@ import requests
|
|
2 |
from pathlib import Path
|
3 |
from utils.enver import enver
|
4 |
from utils.logger import logger
|
|
|
5 |
|
6 |
|
7 |
class GoogleSearcher:
|
@@ -10,7 +11,7 @@ class GoogleSearcher:
|
|
10 |
self.url = "https://www.google.com/search"
|
11 |
self.enver = enver
|
12 |
self.enver.set_envs(proxies=True)
|
13 |
-
self.
|
14 |
|
15 |
def send_request(self, result_num=10):
|
16 |
logger.note(f"Searching: [{self.query}]")
|
@@ -27,12 +28,11 @@ class GoogleSearcher:
|
|
27 |
)
|
28 |
|
29 |
def save_response(self):
|
30 |
-
|
31 |
-
if not self.
|
32 |
-
self.
|
33 |
-
|
34 |
-
|
35 |
-
with open(output_path, "wb") as wf:
|
36 |
wf.write(self.request_response.content)
|
37 |
|
38 |
def search(self, query):
|
|
|
2 |
from pathlib import Path
|
3 |
from utils.enver import enver
|
4 |
from utils.logger import logger
|
5 |
+
from networks.filepath_converter import QueryToFilepathConverter
|
6 |
|
7 |
|
8 |
class GoogleSearcher:
|
|
|
11 |
self.url = "https://www.google.com/search"
|
12 |
self.enver = enver
|
13 |
self.enver.set_envs(proxies=True)
|
14 |
+
self.filepath_converter = QueryToFilepathConverter()
|
15 |
|
16 |
def send_request(self, result_num=10):
|
17 |
logger.note(f"Searching: [{self.query}]")
|
|
|
28 |
)
|
29 |
|
30 |
def save_response(self):
|
31 |
+
self.output_path = self.filepath_converter.convert(self.query)
|
32 |
+
if not self.output_path.exists():
|
33 |
+
self.output_path.parent.mkdir(parents=True, exist_ok=True)
|
34 |
+
logger.note(f"Saving to: [{self.output_path}]")
|
35 |
+
with open(self.output_path, "wb") as wf:
|
|
|
36 |
wf.write(self.request_response.content)
|
37 |
|
38 |
def search(self, query):
|