@@ -534,14 +534,16 @@ void handleModelsRequest(HttpRequest& request, const char* modelPath) {
534534}
535535
536536static void server (AppInferenceContext *context) {
537- NnSocket serverSocket (createServerSocket (context->args ->port ));
537+ NnSocket serverSocket (createServerSocket (context->args ->host , context-> args -> port ));
538538
539539 TokenizerChatStops stops (context->tokenizer );
540540 ChatTemplateGenerator templateGenerator (context->args ->chatTemplateType , context->tokenizer ->chatTemplate , stops.stops [0 ]);
541541 EosDetector eosDetector (stops.nStops , context->tokenizer ->eosTokenIds .data (), stops.stops , stops.maxStopLength , stops.maxStopLength );
542542 ApiServer api (context->inference , context->tokenizer , context->sampler , context->args , context->header , &eosDetector, &templateGenerator);
543543
544- printf (" Server URL: http://127.0.0.1:%d/v1/\n " , context->args ->port );
544+ if (strcmp (context->args ->host , " 0.0.0.0" ) == 0 ||
545+ strcmp (context->args ->host , " 127.0.0.1" ) == 0 )
546+ printf (" Server URL: http://localhost:%d/v1/\n " , context->args ->port );
545547
546548 std::vector<Route> routes = {
547549 {
@@ -577,7 +579,7 @@ static void server(AppInferenceContext *context) {
577579#endif
578580
579581void usage () {
580- fprintf (stderr, " Usage: %s {--model <path>} {--tokenizer <path>} [--port <p>]\n " , EXECUTABLE_NAME);
582+ fprintf (stderr, " Usage: %s {--model <path>} {--tokenizer <path>} [--host <addr>] [-- port <p>]\n " , EXECUTABLE_NAME);
581583 fprintf (stderr, " [--buffer-float-type {f32|f16|q40|q80}]\n " );
582584 fprintf (stderr, " [--weights-float-type {f32|f16|q40|q80}]\n " );
583585 fprintf (stderr, " [--max-seq-len <max>]\n " );
0 commit comments