44
55namespace Scrapegraphai \Core ;
66
7+ use Psr \Http \Client \ClientExceptionInterface ;
78use Psr \Http \Client \ClientInterface ;
89use Psr \Http \Message \RequestFactoryInterface ;
910use Psr \Http \Message \RequestInterface ;
1516use Scrapegraphai \Core \Contracts \BaseStream ;
1617use Scrapegraphai \Core \Conversion \Contracts \Converter ;
1718use Scrapegraphai \Core \Conversion \Contracts \ConverterSource ;
19+ use Scrapegraphai \Core \Exceptions \APIConnectionException ;
1820use Scrapegraphai \Core \Exceptions \APIStatusException ;
1921use Scrapegraphai \RequestOptions ;
2022
@@ -41,7 +43,7 @@ public function __construct(
4143 string $ baseUrl ,
4244 protected RequestOptions $ options = new RequestOptions ,
4345 ) {
44- assert (null !== $ this ->options ->uriFactory );
46+ assert (! is_null ( $ this ->options ->uriFactory ) );
4547 $ this ->baseUrl = $ this ->options ->uriFactory ->createUri ($ baseUrl );
4648 }
4749
@@ -67,7 +69,7 @@ public function request(
6769 // @phpstan-ignore-next-line
6870 [$ req , $ opts ] = $ this ->buildRequest (method: $ method , path: $ path , query: $ query , headers: $ headers , body: $ body , opts: $ options );
6971 ['method ' => $ method , 'path ' => $ uri , 'headers ' => $ headers ] = $ req ;
70- assert (null !== $ opts ->requestFactory );
72+ assert (! is_null ( $ opts ->requestFactory ) );
7173
7274 $ request = $ opts ->requestFactory ->createRequest ($ method , uri: $ uri );
7375 $ request = Util::withSetHeaders ($ request , headers: $ headers );
@@ -170,14 +172,63 @@ protected function followRedirect(
170172 ): RequestInterface {
171173 $ location = $ rsp ->getHeaderLine ('Location ' );
172174 if (!$ location ) {
173- throw new \ RuntimeException ( 'Redirection without Location header ' );
175+ throw new APIConnectionException ( $ req , message: 'Redirection without Location header ' );
174176 }
175177
176178 $ uri = Util::joinUri ($ req ->getUri (), path: $ location );
177179
178180 return $ req ->withUri ($ uri );
179181 }
180182
183+ /**
184+ * @internal
185+ */
186+ protected function shouldRetry (
187+ RequestOptions $ opts ,
188+ int $ retryCount ,
189+ ?ResponseInterface $ rsp
190+ ): bool {
191+ if ($ retryCount >= $ opts ->maxRetries ) {
192+ return false ;
193+ }
194+
195+ $ code = $ rsp ?->getStatusCode();
196+ if (408 == $ code || 409 == $ code || 429 == $ code || $ code >= 500 ) {
197+ return true ;
198+ }
199+
200+ return false ;
201+ }
202+
203+ /**
204+ * @internal
205+ */
206+ protected function retryDelay (
207+ RequestOptions $ opts ,
208+ int $ retryCount ,
209+ ?ResponseInterface $ rsp
210+ ): float {
211+ if (!empty ($ header = $ rsp ?->getHeaderLine('retry-after ' ))) {
212+ if (is_numeric ($ header )) {
213+ return floatval ($ header );
214+ }
215+
216+ try {
217+ $ date = new \DateTimeImmutable ($ header );
218+ $ span = time () - $ date ->getTimestamp ();
219+
220+ return max (0.0 , $ span );
221+ } catch (\DateMalformedStringException ) {
222+ }
223+ }
224+
225+ $ scale = $ retryCount ** 2 ;
226+ $ jitter = 1 - (0.25 * mt_rand () / mt_getrandmax ());
227+ $ naive = $ opts ->initialRetryDelay * $ scale * $ jitter ;
228+
229+ return max (0.0 , min ($ naive , $ opts ->maxRetryDelay ));
230+ }
231+
181232 /**
182233 * @internal
183234 *
@@ -194,25 +245,40 @@ protected function sendRequest(
194245 assert (null !== $ opts ->streamFactory && null !== $ opts ->transporter );
195246
196247 $ req = Util::withSetBody ($ opts ->streamFactory , req: $ req , body: $ data );
197- $ rsp = $ opts ->transporter ->sendRequest ($ req );
198- $ code = $ rsp ->getStatusCode ();
248+
249+ $ rsp = null ;
250+ $ err = null ;
251+
252+ try {
253+ $ rsp = $ opts ->transporter ->sendRequest ($ req );
254+ } catch (ClientExceptionInterface $ e ) {
255+ $ err = $ e ;
256+ }
257+
258+ $ code = $ rsp ?->getStatusCode();
199259
200260 if ($ code >= 300 && $ code < 400 ) {
261+ assert (!is_null ($ rsp ));
262+
201263 if ($ redirectCount >= 20 ) {
202- throw new \ RuntimeException ( 'Maximum redirects exceeded ' );
264+ throw new APIConnectionException ( $ req , message: 'Maximum redirects exceeded ' );
203265 }
204266
205267 $ req = $ this ->followRedirect ($ rsp , req: $ req );
206268
207269 return $ this ->sendRequest ($ opts , req: $ req , data: $ data , retryCount: $ retryCount , redirectCount: ++$ redirectCount );
208270 }
209271
210- if ($ code >= 400 && $ code < 500 ) {
211- throw APIStatusException::from (request: $ req , response: $ rsp );
212- }
272+ if ($ code >= 400 || is_null ($ rsp )) {
273+ if ($ this ->shouldRetry ($ opts , retryCount: $ retryCount , rsp: $ rsp )) {
274+ $ exn = is_null ($ rsp ) ? new APIConnectionException ($ req , previous: $ err ) : APIStatusException::from (request: $ req , response: $ rsp );
275+
276+ throw $ exn ;
277+ }
213278
214- if ($ code >= 500 && $ retryCount < $ opts ->maxRetries ) {
215- usleep ((int ) $ opts ->initialRetryDelay );
279+ $ seconds = $ this ->retryDelay ($ opts , retryCount: $ redirectCount , rsp: $ rsp );
280+ $ floor = floor ($ seconds );
281+ time_nanosleep ((int ) $ floor , nanoseconds: (int ) ($ seconds - $ floor ) * 10 ** 9 );
216282
217283 return $ this ->sendRequest ($ opts , req: $ req , data: $ data , retryCount: ++$ retryCount , redirectCount: $ redirectCount );
218284 }
0 commit comments